1 /**
2 * A GUIGraphics is the interface between a plugin client and a IWindow.
3 *
4 * Copyright: Copyright Auburn Sounds 2015 and later.
5 * License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
6 * Authors:   Guillaume Piolat
7 */
8 module dplug.gui.graphics;
9 
10 import core.atomic;
11 import core.stdc.stdio;
12 
13 import inteli.emmintrin;
14 
15 import dplug.core.math;
16 import dplug.core.thread;
17 
18 import dplug.client.client;
19 import dplug.client.graphics;
20 import dplug.client.daw;
21 
22 import dplug.window.window;
23 
24 import dplug.graphics.mipmap;
25 
26 import dplug.gui.boxlist;
27 import dplug.gui.context;
28 import dplug.gui.element;
29 import dplug.gui.compositor;
30 import dplug.gui.legacypbr;
31 import dplug.gui.sizeconstraints;
32 import dplug.gui.profiler;
33 
34 /// In the whole package:
35 /// The diffuse maps contains:
36 ///   RGBA = red/green/blue/emissiveness
37 /// The depth maps contains depth (0 being lowest, 65535 highest)
38 /// The material map contains:
39 ///   RGBA = roughness / metalness / specular / unused
40 
41 alias RMSP = RGBA; // reminder
42 
43 // Uncomment to enter the marvellous world of dirty rectangles.
44 //debug = resizing;
45 
46 // A GUIGraphics is the interface between a plugin client and a IWindow.
47 // It is also an UIElement and the root element of the plugin UI hierarchy.
48 // You have to derive it to have a GUI.
49 // It dispatches window events to the GUI hierarchy.
50 class GUIGraphics : UIElement, IGraphics
51 {
52 nothrow:
53 @nogc:
54 
55     // Max size of tiles when doing the expensive PBR compositing step.
56     // Difficult trade-off in general: not launching threads (one tile) might be better on low-powered devices and 
57     // in case of a large number of opened UI.
58     // But: having too large tiles also makes a visible delay when operating a single UI, even with only two threads.
59     enum PBR_TILE_MAX_WIDTH = 64;
60     enum PBR_TILE_MAX_HEIGHT = 64;
61 
62     this(SizeConstraints sizeConstraints, UIFlags flags)
63     {
64         _sizeConstraints = sizeConstraints;
65 
66         _uiContext = mallocNew!UIContext(this);
67         super(_uiContext, flags);
68 
69         _windowListener = mallocNew!WindowListener(this);
70 
71         _window = null;
72 
73         // Find what size the UI should at first opening.
74         _sizeConstraints.suggestDefaultSize(&_currentUserWidth, &_currentUserHeight);
75         _currentLogicalWidth  = _currentUserWidth;
76         _currentLogicalHeight = _currentUserHeight;
77         _desiredLogicalWidth  = _currentUserWidth;
78         _desiredLogicalHeight = _currentUserHeight;
79 
80         int numThreads = 0; // auto
81 
82         // Was lowered to 2 in October 2018 to save CPU usage.
83         // Now in Jan 2023, increased to 3 to have a bit smoother PBR.
84         // FUTURE: could make that 4 eventually, see Issue #752. This has minimal memory and CPU 
85         // costs, but is worse on slower plugins.
86         int maxThreads = 3;
87         _threadPool = mallocNew!ThreadPool(numThreads, maxThreads);
88 
89         // Build the compositor
90         {
91             CompositorCreationContext compositorContext;
92             compositorContext.threadPool = _threadPool;
93             _compositor = buildCompositor(&compositorContext);
94         }
95 
96         _rectsToUpdateDisjointedRaw = makeVec!box2i;
97         _rectsToUpdateDisjointedPBR = makeVec!box2i;
98         _rectsTemp = makeVec!box2i;
99 
100         _updateRectScratch[0] = makeVec!box2i;
101         _updateRectScratch[1] = makeVec!box2i;
102 
103         _rectsToComposite = makeVec!box2i;
104         _rectsToCompositeDisjointed = makeVec!box2i;
105         _rectsToCompositeDisjointedTiled = makeVec!box2i;
106 
107         _rectsToDisplay = makeVec!box2i;
108         _rectsToDisplayDisjointed = makeVec!box2i;
109 
110         _rectsToResize = makeVec!box2i;
111         _rectsToResizeDisjointed = makeVec!box2i;
112 
113         _elemsToDrawRaw = makeVec!UIElement;
114         _elemsToDrawPBR = makeVec!UIElement;
115         _sortScratchBuf = makeVec!UIElement;
116 
117         _diffuseMap = mallocNew!(Mipmap!RGBA)();
118         _materialMap = mallocNew!(Mipmap!RGBA)();
119         _depthMap = mallocNew!(Mipmap!L16)();
120 
121         _compositedBuffer = mallocNew!(OwnedImage!RGBA)();
122         _renderedBuffer = mallocNew!(OwnedImage!RGBA)();
123     }
124 
125     // Don't like the default rendering? Override this function and make another compositor.
126     ICompositor buildCompositor(CompositorCreationContext* context)
127     {
128         return mallocNew!PBRCompositor(context);
129     }
130 
131     /// Want a screenshot? Want to generate a mesh or a voxel out of your render?
132     /// Override this function and call `IUIContext.requestUIScreenshot()`
133     ///
134     /// Params: pf Pixel format. pixelFormat == 0 if pixel format is RGBA8
135     ///                          pixelFormat == 1 if pixel format is BGRA8
136     ///                          pixelFormat == 2 if pixel format is ARGB8
137     ///                          You must support all three, sorry.
138     /// All maps have the same dimension, which is the logical pixel size. 
139     /// Warning: nothing to do with the Screencap key, it doesn't get triggered like that.
140     void onScreenshot(ImageRef!RGBA finalRender,    // the output, as show to the plugin user
141                       WindowPixelFormat pixelFormat,// pixel format of `finalRender`, see above
142                       ImageRef!RGBA diffuseMap,     // the PBR diffuse map
143                       ImageRef!L16 depthMap,        // the PBR depth map
144                       ImageRef!RGBA materialMap)    // the PBR material map
145     {
146         // override this to take programmatic screenshots
147         // eg: generate a .vox, .png, etc.
148     }
149 
150     final ICompositor compositor()
151     {
152         return _compositor;
153     }
154 
155     ~this()
156     {
157         closeUI();
158         _uiContext.destroyFree();
159 
160         _threadPool.destroyFree();
161 
162         _compositor.destroyFree();
163         _diffuseMap.destroyFree();
164         _materialMap.destroyFree();
165         _depthMap.destroyFree();
166 
167         _windowListener.destroyFree();
168 
169         destroyFree(_compositedBuffer);
170         destroyFree(_renderedBuffer);
171 
172         alignedFree(_resizedBuffer, 16);
173     }
174 
175     // <IGraphics implementation>
176 
177     override void* openUI(void* parentInfo,
178                           void* controlInfo,
179                           IClient client,
180                           GraphicsBackend backend)
181     {
182         _client = client;
183 
184         WindowBackend wbackend = void;
185         final switch(backend)
186         {
187             case GraphicsBackend.autodetect: wbackend = WindowBackend.autodetect; break;
188             case GraphicsBackend.win32: wbackend = WindowBackend.win32; break;
189             case GraphicsBackend.cocoa: wbackend = WindowBackend.cocoa; break;
190             case GraphicsBackend.x11: wbackend = WindowBackend.x11; break;
191         }
192 
193         position = box2i(0, 0, _currentUserWidth, _currentUserHeight);
194 
195         // Sets the whole UI dirty.
196         // This needs to be done _before_ window creation, else there could be a race
197         // displaying partial updates to the UI.
198         setDirtyWhole(UILayer.allLayers);
199 
200         // We create this window each time.
201         _window = createWindow(WindowUsage.plugin, parentInfo, controlInfo, _windowListener, wbackend, _currentLogicalWidth, _currentLogicalHeight);
202 
203         version(Dplug_ProfileUI) profiler.category("ui").instant("Open UI");
204 
205         return _window.systemHandle();
206     }
207 
208     override void closeUI()
209     {
210         // Destroy window.
211         if (_window !is null)
212         {
213             version(Dplug_ProfileUI)
214             {
215                 profiler.category("ui").instant("Close UI");
216             }
217 
218             _window.destroyFree();
219             _window = null;
220         }
221         _client = null;
222     }
223 
224     override void getGUISize(int* widthLogicalPixels, int* heightLogicalPixels)
225     {
226         *widthLogicalPixels  = _currentLogicalWidth;
227         *heightLogicalPixels = _currentLogicalHeight;
228     }
229 
230     override void getDesiredGUISize(int* widthLogicalPixels, int* heightLogicalPixels)
231     {
232         *widthLogicalPixels  = _desiredLogicalWidth;
233         *heightLogicalPixels = _desiredLogicalHeight;
234     }
235 
236     override bool isResizeable()
237     {
238         return isUIResizable();
239     }
240 
241     override bool isAspectRatioPreserved()
242     {
243         if (!isUIResizable()) return false;
244         return _sizeConstraints.preserveAspectRatio();
245     }
246 
247     override int[2] getPreservedAspectRatio()
248     {
249         return _sizeConstraints.aspectRatio();
250     }
251 
252     override bool isResizeableHorizontally()
253     {
254         return _sizeConstraints.canResizeHorizontally();
255     }
256 
257     override bool isResizeableVertically()
258     {
259         return _sizeConstraints.canResizeVertically();
260     }
261 
262     override void getMaxSmallerValidSize(int* inoutWidth, int* inoutHeight)
263     {
264         _sizeConstraints.getMaxSmallerValidSize(inoutWidth, inoutHeight);
265     }
266 
267     override void getNearestValidSize(int* inoutWidth, int* inoutHeight)
268     {
269         _sizeConstraints.getNearestValidSize(inoutWidth, inoutHeight);
270     }
271 
272     override bool nativeWindowResize(int newWidthLogicalPixels, int newHeightLogicalPixels)
273     {
274         // If it's already the same logical size, nothing to do.
275         if ( (newWidthLogicalPixels == _currentLogicalWidth)
276              &&  (newHeightLogicalPixels == _currentLogicalHeight) )
277             return true;
278 
279         // Issue #669.
280         // Can't resize a non-existing window, return failure.
281         // Hosts where this is needed: VST3PluginTestHost
282         // It calls onSize way too soon.
283         if (_window is null)
284             return false;
285 
286         // Here we request the native window to resize.
287         // The actual resize will be received by the window listener, later.
288         return _window.requestResize(newWidthLogicalPixels, newHeightLogicalPixels, false);
289     }
290 
291     // </IGraphics implementation>
292 
293     // This class is only here to avoid name conflicts between
294     // UIElement and IWindowListener methods :|
295     // Explicit outer to avoid emplace crashing
296     static class WindowListener : IWindowListener
297     {
298     nothrow:
299     @nogc:
300         GUIGraphics outer;
301 
302         this(GUIGraphics outer)
303         {
304             this.outer = outer;
305         }
306 
307         override bool onMouseClick(int x, int y, MouseButton mb, bool isDoubleClick, MouseState mstate)
308         {
309             x -= outer._userArea.min.x;
310             y -= outer._userArea.min.y;
311             bool hitSomething = outer.mouseClick(x, y, mb, isDoubleClick, mstate);
312             if (!hitSomething)
313             {
314                 // Nothing was clicked, nothing is focused anymore
315                 outer._uiContext.setFocused(null);
316             }
317             return hitSomething;
318         }
319 
320         override bool onMouseRelease(int x, int y, MouseButton mb, MouseState mstate)
321         {
322             x -= outer._userArea.min.x;
323             y -= outer._userArea.min.y;
324             outer.mouseRelease(x, y, mb, mstate);
325             return true;
326         }
327 
328         override bool onMouseWheel(int x, int y, int wheelDeltaX, int wheelDeltaY, MouseState mstate)
329         {
330             x -= outer._userArea.min.x;
331             y -= outer._userArea.min.y;
332 
333             // Sends the event to the currently dragged element, if any exists.
334             UIElement dragged = outer._uiContext.dragged;
335             if (dragged !is null)
336             {
337                 box2i pos = dragged._position;
338                 if (dragged.onMouseWheel(x - pos.min.x, y - pos.min.y, wheelDeltaX, wheelDeltaY, mstate))
339                     return true;
340             }
341 
342             return outer.mouseWheel(x, y, wheelDeltaX, wheelDeltaY, mstate);
343         }
344 
345         override void onMouseMove(int x, int y, int dx, int dy, MouseState mstate)
346         {
347             x -= outer._userArea.min.x;
348             y -= outer._userArea.min.y;
349             bool hitSomething = outer.mouseMove(x, y, dx, dy, mstate, false);
350             version(legacyMouseDrag)
351             {
352                 if (!hitSomething)
353                 {
354                     // Nothing was mouse-over'ed, nothing is `isMouseOver()` anymore
355                     outer._uiContext.setMouseOver(null);
356                 }
357             }
358         }
359 
360         override void recomputeDirtyAreas()
361         {
362             return outer.recomputeDirtyAreas();
363         }
364 
365         override bool onKeyDown(Key key)
366         {
367             // Sends the event to the last clicked element first
368             if (outer._uiContext.focused !is null)
369                 if (outer._uiContext.focused.onKeyDown(key))
370                     return true;
371 
372             // else to all Elements
373             return outer.keyDown(key);
374         }
375 
376         override bool onKeyUp(Key key)
377         {
378             // Sends the event to the last clicked element first
379             if (outer._uiContext.focused !is null)
380                 if (outer._uiContext.focused.onKeyUp(key))
381                     return true;
382             // else to all Elements
383             return outer.keyUp(key);
384         }
385 
386         /// Returns areas affected by updates.
387         override box2i getDirtyRectangle() nothrow @nogc
388         {
389             box2i r = outer._rectsToResize[].boundingBox();
390 
391             // If _userArea changed recently, mark the whole area as in need of redisplay.
392             if (outer._reportBlackBordersAndResizedAreaAsDirty)
393                 r = r.expand( box2i(0, 0, outer._currentLogicalWidth, outer._currentLogicalHeight) );
394 
395             debug(resizing)
396             {
397                 if (!r.empty)
398                 {
399                     debugLogf("getDirtyRectangle returned rectangle(%d, %d, %d, %d)\n", r.min.x, r.min.y, r.width, r.height);
400                 }
401             }
402 
403             return r;
404         }
405 
406         override ImageRef!RGBA onResized(int width, int height)
407         {
408             return outer.doResize(width, height);
409         }
410 
411         override void onDraw(WindowPixelFormat pf) nothrow @nogc
412         {
413             return outer.doDraw(pf);
414         }
415 
416         override void onMouseCaptureCancelled()
417         {
418             // Stop an eventual drag operation
419             outer._uiContext.stopDragging();
420         }
421 
422         override void onMouseExitedWindow()
423         {
424             // Stop an eventual isMouseOver
425             version(legacyMouseDrag)
426             {
427                 outer._uiContext.setMouseOver(null);
428             }
429             else
430             {
431                 if (outer._uiContext.dragged is null)
432                     outer._uiContext.setMouseOver(null);
433             }
434         }
435 
436         override void onAnimate(double dt, double time)
437         {
438             version(Dplug_ProfileUI) outer.profiler.category("ui").begin("animate");
439             outer.animate(dt, time);
440             version(Dplug_ProfileUI) outer.profiler.end();
441         }
442 
443         override MouseCursor getMouseCursor()
444         {
445             return outer._uiContext.getCurrentMouseCursor();
446         }
447     }
448 
449     /// Tune this to tune the trade-off between light quality and speed.
450     /// The default value was tuned by hand on very shiny light sources.
451     /// Too high and processing becomes more expensive.
452     /// Too little and the ligth decay doesn't feel natural.
453     /// IMPORTANT: This should be called only inside your main reflow() or at UI creation time.
454     void setUpdateMargin(int margin = 20) nothrow @nogc
455     {
456         _updateMargin = margin; // theoretically it should dirty every PBR rectangle... hence restricting to reflow().
457     }
458 
459 package:
460 
461     // <resizing support>
462 
463     final float getUIScale()
464     {
465         // There is currently no support for this in Dplug, so it is always 1.0f for now.
466         // The OS _might_ upscale the UI without our knowledge though.
467         return 1.0f;
468     }
469 
470     final float getUserScale()
471     {
472         // There is currently no _userArea resize in Dplug, so it is always 1.0f for now.
473         return 1.0f;
474     }
475 
476     final vec2i getDefaultUISizeInPixels()
477     {
478         int w = 0, h = 0;
479         _sizeConstraints.suggestDefaultSize(&w, &h);
480         return vec2i(w, h);
481     }
482 
483     final vec2i getUISizeInPixelsUser()
484     {
485         return vec2i(_currentUserWidth, _currentUserHeight);
486     }
487 
488     final vec2i getUISizeInPixelsLogical()
489     {
490         return vec2i(_currentLogicalWidth, _currentLogicalHeight);
491     }
492 
493     final vec2i getUISizeInPixelsPhysical()
494     {
495         return getUISizeInPixelsLogical(); // no support yet
496     }
497 
498     final void requestUIScreenshot()
499     {
500         atomicStore(_screenShotRequested, true);
501     }
502 
503     final bool requestUIResize(int widthLogicalPixels,
504                                int heightLogicalPixels)
505     {
506         _desiredLogicalWidth  = widthLogicalPixels;
507         _desiredLogicalHeight = heightLogicalPixels;
508 
509         // If it's already the same logical size, nothing to do.
510         if ( (widthLogicalPixels == _currentLogicalWidth)
511             &&  (heightLogicalPixels == _currentLogicalHeight) )
512             return true;
513 
514         // Welcome to a very complicated function!
515         // Most cases of resize are:
516         // 1. ask for resize,
517         // 2. then optionally resize window manually
518         //
519         // However rare cases necessitate to do the reverse.
520         bool hostWantsWindowResizeBeforeRequest = false;
521         version(OSX) {
522             // See Issue #888, this was found as workaround, similar to their .flp format.
523             if (_client.getPluginFormat() == PluginFormat.vst2
524              && _client.getDAW()          == DAW.FLStudio)
525                 hostWantsWindowResizeBeforeRequest = true;
526         }
527 
528         // Note: the client might ask back the plugin size inside this call!
529         // Hence why we have the concept of "desired" size.
530         bool parentWasResized;
531         if (hostWantsWindowResizeBeforeRequest)
532             parentWasResized = true; // will ask later
533         else
534             parentWasResized = _client.requestResize(widthLogicalPixels, heightLogicalPixels);
535 
536 
537         // We do not "desire" something else than the current situation, at this point this is in our hands.
538         _desiredLogicalWidth  = _currentLogicalWidth;
539         _desiredLogicalHeight = _currentLogicalHeight;
540 
541         // We are be able to early-exit here in VST3.
542         // This is because once a VST3 host has resized the parent window, it calls a callback
543         // and that leads to `nativeWindowResize` to be called.
544         if (parentWasResized && (_client.getPluginFormat() == PluginFormat.vst3))
545             return true;
546 
547         // Cubase + VST2 + Windows need special treatment to resize parent and grandparent windows manually (Issue #595).
548         bool needResizeParentWindow = false;
549         version(Windows)
550         {
551             if (_client.getPluginFormat() == PluginFormat.vst2 && _client.getDAW() == DAW.Cubase)
552                 needResizeParentWindow = true;
553         }
554 
555         // In VST2, very few hosts also resize the plugin window. We get to do it manually.
556 
557         // Here we request the native window to resize.
558         // The actual resize will be received by the window listener, later.
559         bool success = _window.requestResize(widthLogicalPixels, heightLogicalPixels, needResizeParentWindow);
560 
561         // FL Studio format is different, the host needs to be notified _after_ a manual resize.
562         if (success && _client.getPluginFormat() == PluginFormat.flp)
563         {
564             success = _client.notifyResized;
565         }
566 
567         // In case the host wanted to be asked afterwards.
568         if (hostWantsWindowResizeBeforeRequest)
569            return _client.requestResize(widthLogicalPixels, heightLogicalPixels);
570 
571         return success;
572     }
573 
574     final void getUINearestValidSize(int* widthLogicalPixels, int* heightLogicalPixels)
575     {
576         // Convert this size to a user width and height
577         int userWidth = cast(int)(0.5f + *widthLogicalPixels * getUserScale());
578         int userHeight = cast(int)(0.5f + *heightLogicalPixels * getUserScale());
579 
580         _sizeConstraints.getNearestValidSize(&userWidth, &userHeight);
581 
582         // Convert back to logical pixels
583         // Note that because of rounding, there might be small problems yet unsolved.
584         *widthLogicalPixels = cast(int)(0.5f + userWidth / getUserScale());
585         *heightLogicalPixels = cast(int)(0.5f + userHeight / getUserScale());
586     }
587 
588     final bool isUIResizable()
589     {
590         // TODO: allow logic resize if internally user area is resampled
591         return _sizeConstraints.isResizable();
592     }
593     // </resizing support>
594 
595 protected:
596 
597     // The link to act on the client through the interface.
598     // Eventually it may supersedes direct usage of the client, or its IHostCommand in UIElements.
599     // Only valid in a openUI/closeUI pair.
600     IClient _client;
601 
602     ICompositor _compositor;
603 
604     UIContext _uiContext;
605 
606     WindowListener _windowListener;
607 
608     // An interface to the underlying window
609     IWindow _window;
610 
611     // Task pool for multi-threaded image work
612     package ThreadPool _threadPool;
613 
614     // Size constraints of this UI.
615     // Currently can only be a fixed size.
616     SizeConstraints _sizeConstraints;
617 
618     // The _external_ size in pixels of the plugin interface.
619     // This is the size seen by the host/window.
620     int _currentLogicalWidth = 0;
621     int _currentLogicalHeight = 0;
622 
623     // The _desired_ size in pixels of the plugin interface.
624     // It is only different from _currentLogicalWidth / _currentLogicalHeight 
625     // during a resize (since the host could query the desired size there).
626     // Not currently well separated unfortunately, it is seldom used.
627     int _desiredLogicalWidth = 0;
628     int _desiredLogicalHeight = 0;
629 
630     // The _internal_ size in pixels of our UI.
631     // This is not the same as the size seen by the window ("logical").
632     int _currentUserWidth = 0;
633     int _currentUserHeight = 0;
634 
635     /// the area in logical area where the user area is drawn.
636     box2i _userArea;
637 
638     // Force userArea refresh on first resize.
639     bool _firstResize = true;
640 
641     // if true, it means the whole resize buffer and accompanying black
642     // borders should be redrawn at the next onDraw
643     bool _redrawBlackBordersAndResizedArea;
644 
645     // if true, it means the whole resize buffer and accompanying black
646     // borders should be reported as dirty at the next recomputeDirtyAreas, and until
647     // it is drawn.
648     bool _reportBlackBordersAndResizedAreaAsDirty;
649 
650     // Diffuse color values for the whole UI.
651     Mipmap!RGBA _diffuseMap;
652 
653     // Depth values for the whole UI.
654     Mipmap!L16 _depthMap;
655 
656     // Depth values for the whole UI.
657     Mipmap!RGBA _materialMap;
658 
659     /// The list of areas to be redrawn at the Raw and PBR levels (composited).
660     /// These are accumulated over possibly several calls of `recomputeDirtyRects`
661     /// and cleared by a call to `onDraw`.
662     /// Other lists of areas are purely derived from `_rectsToUpdateDisjointedRaw`
663     /// and `_rectsToUpdateDisjointedPBR`.
664     Vec!box2i _rectsToUpdateDisjointedRaw;
665     ///ditto
666     Vec!box2i _rectsToUpdateDisjointedPBR;
667 
668     // Used to maintain the _rectsToUpdateXXX invariant of no overlap
669     Vec!box2i _rectsTemp;
670 
671     // Same, but temporary variable for mipmap generation
672     Vec!box2i[2] _updateRectScratch;
673 
674     // The areas that must be effectively re-composited.
675     Vec!box2i _rectsToComposite;
676     Vec!box2i _rectsToCompositeDisjointed; // same list, but reorganized to avoid overlap
677     Vec!box2i _rectsToCompositeDisjointedTiled; // same list, but separated in smaller tiles
678 
679     // The areas that must be effectively redisplayed, which also mean the Raw layer is redrawn.
680     Vec!box2i _rectsToDisplay;
681     Vec!box2i _rectsToDisplayDisjointed; // same list, but reorganized to avoid overlap
682 
683     // The areas that must be effectively redisplayed, in logical space (like _userArea).
684     Vec!box2i _rectsToResize;
685     Vec!box2i _rectsToResizeDisjointed;
686 
687     /// The list of UIElement to potentially call `onDrawPBR` on.
688     Vec!UIElement _elemsToDrawRaw;
689 
690     /// The list of UIElement to potentially call `onDrawPBR` on.
691     Vec!UIElement _elemsToDrawPBR;
692 
693     /// The scratch buffer used to sort the two above list.
694     Vec!UIElement _sortScratchBuf;
695 
696     /// Amount of pixels dirty rectangles are extended with.
697     int _updateMargin = 20;
698 
699     /// The composited buffer, before the Raw layer is applied.
700     OwnedImage!RGBA _compositedBuffer = null;
701 
702     /// The rendered framebuffer.
703     /// This is copied from `_compositedBuffer`, then Raw layer is drawn on top.
704     /// Components are reordered there.
705     /// It must be possible to use a Canvas on it.
706     OwnedImage!RGBA _renderedBuffer = null;
707 
708     /// The final framebuffer.
709     /// It is the only buffer to have a size in logical pixels.
710     /// Internally the UI has an "user" size.
711     /// FUTURE: resize from user size to logical size using a resizer,
712     /// to allow better looking DPI without the OS blurry resizing.
713     /// Or to allow higher internal pixel count.
714     ubyte* _resizedBuffer = null;
715 
716     /// If a screenshot was requested by user widget.
717     shared(bool) _screenShotRequested = false;
718 
719     void recomputeDrawLists()
720     {
721         // recompute draw lists
722         _elemsToDrawRaw.clearContents();
723         _elemsToDrawPBR.clearContents();
724         getDrawLists(_elemsToDrawRaw, _elemsToDrawPBR);
725 
726         // Sort by ascending z-order (high z-order gets drawn last)
727         // This sort must be stable to avoid messing with tree natural order.
728         int compareZOrder(in UIElement a, in UIElement b) nothrow @nogc
729         {
730             return a.zOrder() - b.zOrder();
731         }
732         timSort!UIElement(_elemsToDrawRaw[], _sortScratchBuf, &compareZOrder);
733         timSort!UIElement(_elemsToDrawPBR[], _sortScratchBuf, &compareZOrder);
734     }
735 
736     // Useful to convert 16-byte aligned buffer into an ImageRef!RGBA
737     // This was probably still needed because of Issue #693. This was secretly a 
738     // workaround. FUTURE: replace by regular toRef
739     final ImageRef!RGBA toImageRef(ubyte* alignedBuffer, int width, int height)
740     {
741         ImageRef!RGBA ir = void;
742         ir.w = width;
743         ir.h = height;
744         ir.pitch = byteStride(width);
745         ir.pixels = cast(RGBA*)alignedBuffer;
746         return ir;
747     }
748 
749     IProfiler profiler()
750     {
751         return _uiContext.profiler();
752     }
753 
754     void doDraw(WindowPixelFormat pf) nothrow @nogc
755     {
756         version(Dplug_ProfileUI) profiler.category("ui").begin("doDraw");
757 
758         debug(resizing) debugLogf(">doDraw\n");
759 
760         debug(resizing)
761         {
762             foreach(r; _rectsToUpdateDisjointedPBR[])
763             {
764                 debugLogf("  * this will redraw PBR rectangle(%d, %d, %d, %d)\n", r.min.x, r.min.y, r.width, r.height);
765             }
766             foreach(r; _rectsToUpdateDisjointedRaw[])
767             {
768                 debugLogf("  * this will redraw RAW rectangle(%d, %d, %d, %d)\n", r.min.x, r.min.y, r.width, r.height);
769             }
770         }
771 
772         // A. Recompute draw lists
773         // These are the `UIElement`s that _may_ have their onDrawXXX callbacks called.
774 
775         version(Dplug_ProfileUI) profiler.begin("Recompute Draw Lists");
776         recomputeDrawLists();
777         version(Dplug_ProfileUI) profiler.end();
778 
779         // Composite GUI
780         // Most of the cost of rendering is here
781         // B. 1st PASS OF REDRAW
782         // Some UIElements are redrawn at the PBR level
783         version(Dplug_ProfileUI) profiler.begin("Draw Elements PBR");
784         redrawElementsPBR();
785         version(Dplug_ProfileUI) profiler.end();
786 
787         // C. MIPMAPPING
788         version(Dplug_ProfileUI) profiler.begin("Regenerate Mipmaps");
789         regenerateMipmaps();
790         version(Dplug_ProfileUI) profiler.end();
791 
792         // D. COMPOSITING
793         auto compositedRef = _compositedBuffer.toRef();
794 
795         version(Dplug_ProfileUI) profiler.begin("Composite GUI");
796         compositeGUI(compositedRef); // Launch the possibly-expensive Compositor step, which implements PBR rendering
797         version(Dplug_ProfileUI) profiler.end();
798 
799         // E. COPY FROM "COMPOSITED" TO "RENDERED" BUFFER
800         // Copy _compositedBuffer onto _renderedBuffer for every rect that will be changed on display
801         auto renderedRef = _renderedBuffer.toRef();
802         version(Dplug_ProfileUI) profiler.begin("Copy to renderbuffer");
803         foreach(rect; _rectsToDisplayDisjointed[])
804         {
805             auto croppedComposite = compositedRef.cropImageRef(rect);
806             auto croppedRendered = renderedRef.cropImageRef(rect);
807             croppedComposite.blitTo(croppedRendered); // failure to optimize this: 1
808         }
809         version(Dplug_ProfileUI) profiler.end();
810         
811         // F. 2nd PASS OF REDRAW
812         version(Dplug_ProfileUI) profiler.begin("Draw Elements Raw");
813         redrawElementsRaw();
814         version(Dplug_ProfileUI) profiler.end();
815 
816         // G. Reorder components to the right pixel format
817         version(Dplug_ProfileUI) profiler.begin("Component Reorder");
818         reorderComponents(pf);
819         version(Dplug_ProfileUI) profiler.end();
820 
821         // G.bis
822         // We have a render.
823         // Eventually make a screenshot here, if one was requested asynchronously.
824         if (cas(&_screenShotRequested, true, false))
825         {
826             onScreenshot(_renderedBuffer.toRef(), 
827                          pf, 
828                          _diffuseMap.levels[0].toRef,
829                          _depthMap.levels[0].toRef,
830                          _materialMap.levels[0].toRef);
831         }
832 
833         // H. Copy updated content to the final buffer. (hint: not actually resizing)
834         version(Dplug_ProfileUI) profiler.begin("Copy content");
835         resizeContent(pf);
836         version(Dplug_ProfileUI) profiler.end();
837 
838         // Only then is the list of rectangles to update cleared,
839         // before calling `doDraw` such work accumulates
840         _rectsToUpdateDisjointedPBR.clearContents();
841         _rectsToUpdateDisjointedRaw.clearContents();
842 
843         version(Dplug_ProfileUI) profiler.end();
844         debug(resizing) debugLogf("<doDraw\n");
845     }
846 
847     void recomputeDirtyAreas() nothrow @nogc
848     {
849         // First we pull dirty rectangles from the UI, for the PBR and Raw layers
850         // Note that there is indeed a race here (the same UIElement could have pushed rectangles in both
851         // at around the same time), but that isn't a problem.
852         context().dirtyListRaw.pullAllRectangles(_rectsToUpdateDisjointedRaw);
853         context().dirtyListPBR.pullAllRectangles(_rectsToUpdateDisjointedPBR);
854 
855         recomputePurelyDerivedRectangles();
856     }
857 
858     void recomputePurelyDerivedRectangles()
859     {
860         // If a resize has been made recently, we need to clip rectangles
861         // in the pending lists to the new size.
862         // All other rectangles are purely derived from those.
863         // PERF: this check is necessary because of #597.
864         //       Solveing this is a long-term quest in itself.
865         box2i validUserArea = rectangle(0, 0, _currentUserWidth, _currentUserHeight);
866         foreach (ref r; _rectsToUpdateDisjointedRaw[])
867         {
868             r = r.intersection(validUserArea);
869         }
870         foreach (ref r; _rectsToUpdateDisjointedPBR[])
871         {
872             r = r.intersection(validUserArea);
873         }
874 
875         // The problem here is that if the window isn't shown there may be duplicates in
876         // _rectsToUpdateDisjointedRaw and _rectsToUpdateDisjointedPBR
877         // (`recomputeDirtyAreas`called multiple times without clearing those arrays),
878         //  so we have to maintain unicity again.
879         // Also duplicate can accumulate in case of two successive onResize (to test: Studio One with continuous resizing plugin)
880         //
881         // PERF: when the window is shown, we could overwrite content of _rectsToUpdateDisjointedRaw/_rectsToUpdateDisjointedPBR?
882         //       instead of doing that.
883         {
884             // Make _rectsToUpdateDisjointedRaw disjointed
885             _rectsTemp.clearContents();
886             removeOverlappingAreas(_rectsToUpdateDisjointedRaw, _rectsTemp);
887             _rectsToUpdateDisjointedRaw.clearContents();
888             _rectsToUpdateDisjointedRaw.pushBack(_rectsTemp);
889             assert(haveNoOverlap(_rectsToUpdateDisjointedRaw[]));
890 
891             // Make _rectsToUpdateDisjointedPBR disjointed
892             _rectsTemp.clearContents();
893             removeOverlappingAreas(_rectsToUpdateDisjointedPBR, _rectsTemp);
894             _rectsToUpdateDisjointedPBR.clearContents();
895             _rectsToUpdateDisjointedPBR.pushBack(_rectsTemp);
896             assert(haveNoOverlap(_rectsToUpdateDisjointedPBR[]));
897         }
898 
899         // Compute _rectsToRender and _rectsToDisplay, purely derived from the above.
900         // Note that they are possibly overlapping collections
901         // _rectsToComposite <- margin(_rectsToUpdateDisjointedPBR)
902         // _rectsToDisplay <- union(_rectsToComposite, _rectsToUpdateDisjointedRaw)
903         {
904             _rectsToComposite.clearContents();
905             foreach(rect; _rectsToUpdateDisjointedPBR)
906             {
907                 assert(rect.isSorted);
908                 assert(!rect.empty);
909                 _rectsToComposite.pushBack( convertPBRLayerRectToRawLayerRect(rect, _currentUserWidth, _currentUserHeight) );
910             }
911 
912             // Compute the non-overlapping version
913             _rectsToCompositeDisjointed.clearContents();
914             removeOverlappingAreas(_rectsToComposite, _rectsToCompositeDisjointed);
915 
916             _rectsToDisplay.clearContents();
917             _rectsToDisplay.pushBack(_rectsToComposite);
918             foreach(rect; _rectsToUpdateDisjointedRaw)
919             {
920                 assert(rect.isSorted);
921                 assert(!rect.empty);
922                 _rectsToDisplay.pushBack( rect );
923             }
924 
925             // Compute the non-overlapping version
926             _rectsToDisplayDisjointed.clearContents();
927             removeOverlappingAreas(_rectsToDisplay, _rectsToDisplayDisjointed);
928         }
929 
930         // Compute _rectsToResize and _rectsToDisplayDisjointed to write resized content to (in the logical pixel area).
931         // These rectangle are constrained to update only _userArea.
932         {
933             _rectsToResize.clearContents();
934             foreach(rect; _rectsToDisplay[])
935             {
936                 box2i r = convertUserRectToLogicalRect(rect).intersection(_userArea);
937                 _rectsToResize.pushBack(r);
938             }
939 
940             if (_reportBlackBordersAndResizedAreaAsDirty)
941             {
942                 // Redraw whole resized zone and black borders on next draw, as this will
943                 // be reported to the OS as being repainted.
944                 _redrawBlackBordersAndResizedArea = true;
945             }
946             _rectsToResizeDisjointed.clearContents();
947             removeOverlappingAreas(_rectsToResize, _rectsToResizeDisjointed);
948 
949             // All those rectangles should be strictly in _userArea
950             foreach(r; _rectsToResizeDisjointed)
951                 assert(_userArea.contains(r));
952         }
953     }
954 
955     final box2i convertPBRLayerRectToRawLayerRect(box2i rect, int width, int height) nothrow @nogc
956     {
957         int xmin = rect.min.x - _updateMargin;
958         int ymin = rect.min.y - _updateMargin;
959         int xmax = rect.max.x + _updateMargin;
960         int ymax = rect.max.y + _updateMargin;
961 
962         if (xmin < 0) xmin = 0;
963         if (ymin < 0) ymin = 0;
964         if (xmax > width) xmax = width;
965         if (ymax > height) ymax = height;
966 
967         // This could also happen if an UIElement is moved quickly
968         if (xmax < 0) xmax = 0;
969         if (ymax < 0) ymax = 0;
970         if (xmin > width) xmin = width;
971         if (ymin > height) ymin = height;
972 
973         box2i result = box2i(xmin, ymin, xmax, ymax);
974         assert(result.isSorted);
975         return result;
976     }
977 
978     ImageRef!RGBA doResize(int widthLogicalPixels,
979                            int heightLogicalPixels) nothrow @nogc
980     {
981         version(Dplug_ProfileUI) profiler.category("ui").begin("doResize");
982         debug(resizing) debugLogf(">doResize(%d, %d)\n", widthLogicalPixels, heightLogicalPixels);
983 
984         /// We do receive a new size in logical pixels.
985         /// This is coming from getting the window client area. The reason
986         /// for this resize doesn't matter, we must find a mapping that fits
987         /// between this given logical size and user size.
988 
989         // 1.a Based upon the _sizeConstraints, select a user size in pixels.
990         //     Keep in mind if the _userArea has just moved (just moving the contents elsewhere)
991         //     or if its size has changed (user size), which require a redraw.
992 
993         // Has the logical available size changed?
994         bool logicalSizeChanged = false;
995         if (_currentLogicalWidth != widthLogicalPixels)
996         {
997             _currentLogicalWidth = widthLogicalPixels;
998             logicalSizeChanged = true;
999         }
1000         if (_currentLogicalHeight != heightLogicalPixels)
1001         {
1002             _currentLogicalHeight = heightLogicalPixels;
1003             logicalSizeChanged = true;
1004         }
1005 
1006         int newUserWidth = widthLogicalPixels;
1007         int newUserHeight = heightLogicalPixels;
1008         _sizeConstraints.getMaxSmallerValidSize(&newUserWidth, &newUserHeight);
1009 
1010         bool userSizeChanged = false;
1011         if (_currentUserWidth != newUserWidth)
1012         {
1013             _currentUserWidth = newUserWidth;
1014             userSizeChanged = true;
1015         }
1016         if (_currentUserHeight != newUserHeight)
1017         {
1018             _currentUserHeight = newUserHeight;
1019             userSizeChanged = true;
1020         }
1021 
1022         // On first onResize, assume both sizes changed
1023         if (_firstResize)
1024         {
1025             logicalSizeChanged = true;
1026             userSizeChanged = true;
1027             _firstResize = false;
1028         }
1029 
1030         if (userSizeChanged) { assert(logicalSizeChanged); }
1031 
1032         // 1.b Update user area rect. We find a suitable space in logical area
1033         //     to draw the whole UI.
1034         if (logicalSizeChanged)
1035         {
1036             int x, y, w, h;
1037             if (_currentLogicalWidth >= _currentUserWidth)
1038             {
1039                 x = (_currentLogicalWidth - _currentUserWidth) / 2;
1040                 w = _currentUserWidth;
1041             }
1042             else
1043             {
1044                 x = 0;
1045                 w = _currentLogicalWidth;
1046             }
1047             if (_currentLogicalHeight >= _currentUserHeight)
1048             {
1049                 y = (_currentLogicalHeight - _currentUserHeight) / 2;
1050                 h = _currentUserHeight;
1051             }
1052             else
1053             {
1054                 y = 0;
1055                 h = _currentLogicalHeight;
1056             }
1057 
1058             _userArea = box2i.rectangle(x, y, w, h);
1059 
1060             debug(resizing)
1061             {
1062                 debugLogf("new _userArea is rectangle(%d, %d, %d, %d)\n", x, y, w, h);
1063             }
1064 
1065             _reportBlackBordersAndResizedAreaAsDirty = true;
1066 
1067             // Note: out of range rectangles will still be in the dirtyListRaw/dirtyListPBR
1068             // and also _rectsToUpdateDisjointedPBR/_rectsToUpdateDisjointedRaw
1069             // This is the dreaded Issue #597
1070             // Unicity and boundness is maintained inside recomputePurelyDerivedRectangles().
1071 
1072             // The user size has changed. Force an immediate full redraw, so that no ancient data is used.
1073             // Not that this is on top of previous resizes or pulled rectangles in 
1074             // _rectsToUpdateDisjointedPBR / _rectsToUpdateDisjointedRaw.
1075             if (userSizeChanged)
1076             {
1077                 debug(resizing) debugLogf("  * provoke full redraw\n");
1078                 _rectsToUpdateDisjointedPBR.pushBack( rectangle(0, 0, _userArea.width, _userArea.height) );
1079             }
1080 
1081             // This avoids an onDraw with wrong rectangles
1082             recomputePurelyDerivedRectangles();
1083         }
1084 
1085         // 2. Invalidate UI region if user size change.
1086         //    Note: _resizedBuffer invalidation is managed with flags instead of this.
1087         position = box2i(0, 0, _currentUserWidth, _currentUserHeight);
1088 
1089         // 3. Resize compositor buffers.
1090         _compositor.resizeBuffers(_currentUserWidth, _currentUserHeight, PBR_TILE_MAX_WIDTH, PBR_TILE_MAX_HEIGHT);
1091 
1092         _diffuseMap.size(5, _currentUserWidth, _currentUserHeight);
1093         _depthMap.size(4, _currentUserWidth, _currentUserHeight);
1094 
1095         // The first level of the depth map has a border of 1 pixels and 2 pxiels on the right, to simplify some PBR passes
1096         int border_1 = 1;
1097         int rowAlign_1 = 1;
1098         int xMultiplicity_1 = 1;
1099         int trailingSamples_2 = 2;
1100         _depthMap.levels[0].size(_currentUserWidth, _currentUserHeight, border_1, rowAlign_1, xMultiplicity_1, trailingSamples_2);
1101 
1102         _materialMap.size(0, _currentUserWidth, _currentUserHeight);
1103 
1104         // Extends buffers with user size
1105 
1106         int border_0 = 0;
1107         int rowAlign_16 = 16;
1108         int trailingSamples_0 = 0;
1109         int trailingSamples_3 = 3;
1110         _compositedBuffer.size(_currentUserWidth, _currentUserHeight, border_0, rowAlign_16, xMultiplicity_1, trailingSamples_0);
1111         _renderedBuffer.size(_currentUserWidth, _currentUserHeight, border_0, rowAlign_16, xMultiplicity_1, trailingSamples_3);
1112 
1113         // Extends final buffer with logical size
1114         //
1115         // Why one line more with the +1? This is to fixIssue #741 and all other related macOS bugs.
1116         // This workarounds an Apple bug that made a lot of crashed between Nov2022 and Jan2024.
1117         size_t sizeNeeded = byteStride(_currentLogicalWidth) * (_currentLogicalHeight + 1);
1118 
1119 
1120         _resizedBuffer = cast(ubyte*) alignedRealloc(_resizedBuffer, sizeNeeded, 16);
1121 
1122         debug(resizing) debugLogf("<doResize(%d, %d)\n", widthLogicalPixels, heightLogicalPixels);
1123 
1124         version(Dplug_ProfileUI) profiler.end();
1125 
1126         return toImageRef(_resizedBuffer, _currentLogicalWidth, _currentLogicalHeight);
1127     }
1128 
1129     /// Draw the Raw layer of `UIElement` widgets
1130     void redrawElementsRaw() nothrow @nogc
1131     {
1132         enum bool parallelDraw = true;
1133 
1134         ImageRef!RGBA renderedRef = _renderedBuffer.toRef();
1135 
1136         // No need to launch threads only to have them realize there isn't anything to do
1137         if (_rectsToDisplayDisjointed.length == 0)
1138             return;
1139 
1140         static if (parallelDraw)
1141         {
1142             int drawn = 0;
1143             int N = cast(int)_elemsToDrawRaw.length;
1144 
1145             while(drawn < N)
1146             {
1147                 // See: redrawElementsPBR below for a remark on performance there.
1148 
1149                 int canBeDrawn = 1; // at least one can be drawn without collision
1150 
1151                 // Does this first widget in the FIFO wants to be draw alone?
1152                 if (! _elemsToDrawRaw[drawn].isDrawAloneRaw())
1153                 {
1154                     // Search max number of parallelizable draws until the end of the list or a collision is found
1155                     bool foundIntersection = false;
1156                     for ( ; (drawn + canBeDrawn < N); ++canBeDrawn)
1157                     {
1158                         // Should we include this element to the assembled set of widgets to draw?
1159                         UIElement candidateWidget = _elemsToDrawRaw[drawn + canBeDrawn];
1160 
1161                         if (candidateWidget.isDrawAloneRaw())
1162                             break; // wants to be drawn alone
1163 
1164                         box2i candidatePos = candidateWidget.position;
1165 
1166                         for (int j = 0; j < canBeDrawn; ++j) // PERF: aaaand this is nicely quadratic
1167                         {
1168                             if (_elemsToDrawRaw[drawn + j].position.intersects(candidatePos))
1169                             {
1170                                 foundIntersection = true;
1171                                 break;
1172                             }
1173                         }
1174                         if (foundIntersection)
1175                             break;
1176                     }
1177                 }
1178 
1179                 assert(canBeDrawn >= 1);
1180 
1181                 // Draw a number of UIElement in parallel
1182                 void drawOneItem(int i, int threadIndex) nothrow @nogc
1183                 {
1184                     version(Dplug_ProfileUI) 
1185                     {
1186                         char[maxUIElementIDLength + 16] idstr;
1187                         snprintf(idstr.ptr, 128, 
1188                                  "draw Raw element %s".ptr, _elemsToDrawRaw[drawn + i].getId().ptr);
1189                         profiler.category("draw").begin(idstr);
1190                     }
1191 
1192                     _elemsToDrawRaw[drawn + i].renderRaw(renderedRef, _rectsToDisplayDisjointed[]);
1193 
1194                     version(Dplug_ProfileUI) profiler.end();
1195                 }
1196                 _threadPool.parallelFor(canBeDrawn, &drawOneItem);
1197 
1198                 drawn += canBeDrawn;
1199                 assert(drawn <= N);
1200             }
1201             assert(drawn == N);
1202         }
1203         else
1204         {
1205             foreach(elem; _elemsToDrawRaw)
1206                 elem.renderRaw(renderedRef, _rectsToDisplayDisjointed[]);
1207         }
1208     }
1209 
1210     /// Draw the PBR layer of `UIElement` widgets
1211     void redrawElementsPBR() nothrow @nogc
1212     {
1213         enum bool parallelDraw = true;
1214 
1215         assert(_diffuseMap.levels[0] !is null);
1216         assert(_depthMap.levels[0] !is null);
1217         assert(_materialMap.levels[0] !is null);
1218         auto diffuseRef = _diffuseMap.levels[0].toRef();
1219         auto depthRef = _depthMap.levels[0].toRef();
1220         auto materialRef = _materialMap.levels[0].toRef();
1221 
1222         // No need to launch threads only to have them realize there isn't anything to do
1223         if (_rectsToUpdateDisjointedPBR.length == 0)
1224             return;
1225 
1226         static if (parallelDraw)
1227         {
1228             int drawn = 0;
1229             int N = cast(int)_elemsToDrawPBR.length;
1230 
1231             while(drawn < N)
1232             {
1233                 // <Scheduling remark>
1234                 // PERF: scheduling here is not entirely optimal: consecutive overalapping widgets 
1235                 // would block further parallel draw if the next widget doesn't overlap the other two.
1236                 //
1237                 //  ________          _____
1238                 //  |      |          |   |
1239                 //  |  B   |______    | C |      <---- Will not draw A and C in parallel if
1240                 //  |______|     |    |___|            Z(A) < Z(B) < Z(C)
1241                 //      |    A   |
1242                 //      |________|
1243                 //
1244                 // PERF: to go further, could use the disjointed rects to draw even more in parallel. 
1245                 // Real updated graphics is intersection(position, union(_rectsToUpdateDisjointedPBR)),
1246                 // not simply the widget position.
1247                 // </Scheduling remark>
1248 
1249                 int canBeDrawn = 1; // at least one can be drawn without collision
1250 
1251                 // Does this first widget in the FIFO wants to be draw alone?
1252                 if (! _elemsToDrawPBR[drawn].isDrawAlonePBR())
1253                 {
1254                     // Search max number of parallelizable draws until the end of the list or a collision is found
1255                     bool foundIntersection = false;
1256                     for ( ; (drawn + canBeDrawn < N); ++canBeDrawn)
1257                     {
1258                         // Should we include this element to the assembled set of widgets to draw?
1259                         UIElement candidateWidget = _elemsToDrawPBR[drawn + canBeDrawn];
1260 
1261                         if (candidateWidget.isDrawAlonePBR())
1262                             break; // wants to be drawn alone
1263 
1264                         box2i candidatePos = _elemsToDrawPBR[drawn + canBeDrawn].position;
1265 
1266                         for (int j = 0; j < canBeDrawn; ++j) // check with each former selected widget, PERF quadratic
1267                         {
1268                             if (_elemsToDrawPBR[drawn + j].position.intersects(candidatePos))
1269                             {
1270                                 foundIntersection = true;
1271                                 break;
1272                             }
1273                         }
1274                         if (foundIntersection)
1275                             break;
1276                     }
1277                 }
1278 
1279                 assert(canBeDrawn >= 1);
1280 
1281                 // Draw a number of UIElement in parallel
1282                 void drawOneItem(int i, int threadIndex) nothrow @nogc
1283                 {
1284                     version(Dplug_ProfileUI) 
1285                     {
1286                         char[maxUIElementIDLength + 16] idstr;
1287                         snprintf(idstr.ptr, 128, 
1288                                  "draw PBR element %s", _elemsToDrawPBR[drawn + i].getId().ptr);
1289                         profiler.category("draw").begin(idstr);
1290                     }
1291 
1292                     _elemsToDrawPBR[drawn + i].renderPBR(diffuseRef, depthRef, materialRef, _rectsToUpdateDisjointedPBR[]);
1293 
1294                     version(Dplug_ProfileUI) profiler.end();
1295                 }
1296                 _threadPool.parallelFor(canBeDrawn, &drawOneItem);
1297 
1298                 drawn += canBeDrawn;
1299                 assert(drawn <= N);
1300             }
1301             assert(drawn == N);
1302         }
1303         else
1304         {
1305             // Render required areas in diffuse and depth maps, base level
1306             foreach(elem; _elemsToDraw)
1307                 elem.renderPBR(diffuseRef, depthRef, materialRef, _rectsToUpdateDisjointedPBR[]);
1308         }
1309     }
1310 
1311     /// Do the PBR compositing step. This is the most expensive step in the UI.
1312     void compositeGUI(ImageRef!RGBA wfb) nothrow @nogc
1313     {
1314         _rectsToCompositeDisjointedTiled.clearContents();
1315         tileAreas(_rectsToCompositeDisjointed[],  PBR_TILE_MAX_WIDTH, PBR_TILE_MAX_HEIGHT, _rectsToCompositeDisjointedTiled);
1316 
1317         _compositor.compositeTile(wfb,
1318                                   _rectsToCompositeDisjointedTiled[],
1319                                   _diffuseMap,
1320                                   _materialMap,
1321                                   _depthMap,
1322                                   profiler());
1323     }
1324 
1325     /// Compose lighting effects from depth and diffuse into a result.
1326     /// takes output image and non-overlapping areas as input
1327     /// Useful multithreading code.
1328     void regenerateMipmaps() nothrow @nogc
1329     {
1330         int numAreas = cast(int)_rectsToUpdateDisjointedPBR.length;
1331 
1332         // No mipmap to update, no need to launch threads
1333         if (numAreas == 0)
1334             return;
1335 
1336         // Fill update rect buffer with the content of _rectsToUpdateDisjointedPBR
1337         for (int i = 0; i < 2; ++i)
1338         {
1339             _updateRectScratch[i].clearContents();
1340             _updateRectScratch[i].pushBack(_rectsToUpdateDisjointedPBR[]);
1341         }
1342 
1343         // Mipmapping used to be threaded, however because it's completely memory-bound
1344         // (about 2mb read/sec) and fast enough, it's not worth launching threads for.
1345 
1346         version(Dplug_ProfileUI) profiler.category("mipmap").begin("diffuse mipmap");
1347 
1348         // Generate diffuse mipmap, useful for dealing with emissive
1349         {
1350             // diffuse
1351             Mipmap!RGBA mipmap = _diffuseMap;
1352             foreach(level; 1 .. mipmap.numLevels())
1353             {
1354                 Mipmap!RGBA.Quality quality;
1355                 if (level == 1)
1356                     quality = Mipmap!RGBA.Quality.boxAlphaCovIntoPremul;
1357                 else
1358                     quality = Mipmap!RGBA.Quality.cubic;
1359                 foreach(ref area; _updateRectScratch[0])
1360                 {
1361                     // Note: the rects might be disjointed, but each leveling up makes them
1362                     // Possibly overlapping. It is assumed the cost is minor.
1363                     // Some pixels in higher mipmap levels might be computed several times.
1364                     area = mipmap.generateNextLevel(quality, area, level);
1365                 }
1366             }
1367         }
1368 
1369         version(Dplug_ProfileUI) profiler.end;
1370 
1371         version(Dplug_ProfileUI) profiler.begin("depth mipmap");
1372 
1373         // Generate depth mipmap, useful for dealing with ambient occlusion
1374         {
1375             int W = _currentUserWidth;
1376             int H = _currentUserHeight;
1377 
1378             // Depth is special since it has a border!
1379             // Regenerate the border area that needs to be regenerated
1380             OwnedImage!L16 level0 = _depthMap.levels[0];
1381             foreach(box2i area; _updateRectScratch[1])
1382                 level0.replicateBordersTouching(area);
1383 
1384             // DEPTH MIPMAPPING
1385             Mipmap!L16 mipmap = _depthMap;
1386             foreach(level; 1 .. mipmap.numLevels())
1387             {
1388                 auto quality = level >= 3 ? Mipmap!L16.Quality.cubic : Mipmap!L16.Quality.box;
1389                 foreach(ref area; _updateRectScratch[1])
1390                 {
1391                     area = mipmap.generateNextLevel(quality, area, level);
1392                 }
1393             }
1394         }
1395 
1396         version(Dplug_ProfileUI) profiler.end;
1397     }
1398 
1399     void reorderComponents(WindowPixelFormat pf)
1400     {
1401         auto renderedRef = _renderedBuffer.toRef();
1402 
1403         final switch(pf)
1404         {
1405             case WindowPixelFormat.RGBA8:
1406                 foreach(rect; _rectsToDisplayDisjointed[])
1407                 {
1408                     shuffleComponentsRGBA8ToRGBA8AndForceAlphaTo255(renderedRef.cropImageRef(rect));
1409                 }
1410                 break;
1411 
1412             case WindowPixelFormat.BGRA8:
1413                 foreach(rect; _rectsToDisplayDisjointed[])
1414                 {
1415                     shuffleComponentsRGBA8ToBGRA8AndForceAlphaTo255(renderedRef.cropImageRef(rect));
1416                 }
1417                 break;
1418 
1419             case WindowPixelFormat.ARGB8:
1420                 foreach(rect; _rectsToDisplayDisjointed[])
1421                 {
1422                     shuffleComponentsRGBA8ToARGB8AndForceAlphaTo255(renderedRef.cropImageRef(rect));
1423                 }
1424                 break;
1425         }
1426     }
1427 
1428     // From a user area rectangle, return a logical are rectangle with the same size.
1429     final box2i convertUserRectToLogicalRect(box2i b)
1430     {
1431         return b.translate(_userArea.min);
1432     }
1433 
1434     final box2i convertLogicalRectToUserRect(box2i b)
1435     {
1436         return b.translate(-_userArea.min);
1437     }
1438 
1439     void resizeContent(WindowPixelFormat pf)
1440     {
1441         // TODO: eventually resize?
1442         // For now what we do for logical area is crop and offset.
1443         // In the future, could be beneficial to resample if needed.
1444 
1445         auto renderedRef = _renderedBuffer.toRef();
1446         auto resizedRef = toImageRef(_resizedBuffer, _currentLogicalWidth, _currentLogicalHeight);
1447 
1448         box2i[] rectsToCopy = _rectsToResizeDisjointed[];
1449 
1450         // If invalidated, the whole buffer needs to be redrawn
1451         // (because of borders, or changing offsets of the user area).
1452         if (_redrawBlackBordersAndResizedArea)
1453         {
1454             debug(resizing) debugLogf("  * redrawing black borders, and copy item\n");
1455             RGBA black;
1456             final switch(pf)
1457             {
1458                 case WindowPixelFormat.RGBA8:
1459                 case WindowPixelFormat.BGRA8: black = RGBA(0, 0, 0, 255); break;
1460                 case WindowPixelFormat.ARGB8: black = RGBA(255, 0, 0, 0); break;
1461             }
1462             resizedRef.fillAll(black); // PERF: Only do this in the location of the black border.
1463 
1464             // No need to report that everything is dirty anymore.
1465             _reportBlackBordersAndResizedAreaAsDirty = false;
1466 
1467             // and no need to draw everything in onDraw anymore.
1468             _redrawBlackBordersAndResizedArea = false;
1469 
1470             rectsToCopy = (&_userArea)[0..1];
1471         }
1472 
1473         foreach(rect; rectsToCopy[])
1474         {
1475             int dx = _userArea.min.x;
1476             int dy = _userArea.min.y;
1477 
1478             for (int j = rect.min.y; j < rect.max.y; ++j)
1479             {
1480                 RGBA* src  = renderedRef.scanline(j - dy).ptr;
1481                 RGBA* dest = resizedRef.scanline(j).ptr;
1482                 dest[rect.min.x..rect.max.x] = src[(rect.min.x - dx)..(rect.max.x - dx)];
1483             }
1484         }
1485     }
1486 }
1487 
1488 
1489 // given a width, how long in bytes should scanlines be for the final output buffer.
1490 // Note: it seems win32 needs this exact stride for returned buffer. It mimics BMP.
1491 //       On the other hands, is seems other platforms don't have the same constraints with row pitch.
1492 int byteStride(int width) pure nothrow @nogc
1493 {
1494     // See https://github.com/AuburnSounds/Dplug/issues/563, there
1495     // is currently a coupling with dplug:window and this can't be changed.
1496     enum scanLineAlignment = 4;
1497     int widthInBytes = width * 4;
1498     return (widthInBytes + (scanLineAlignment - 1)) & ~(scanLineAlignment-1);
1499 }
1500 
1501 void shuffleComponentsRGBA8ToARGB8AndForceAlphaTo255(ImageRef!RGBA image) pure nothrow @nogc
1502 {
1503     immutable int w = image.w;
1504     immutable int h = image.h;
1505     for (int j = 0; j < h; ++j)
1506     {
1507         ubyte* scan = cast(ubyte*)image.scanline(j).ptr;
1508 
1509         int i = 0;
1510         for( ; i + 3 < w; i += 4)
1511         {
1512             __m128i inputBytes = _mm_loadu_si128(cast(__m128i*)(&scan[4*i]));
1513             inputBytes = _mm_or_si128(inputBytes, _mm_set1_epi32(0xff000000));
1514 
1515             version(LDC)
1516             {
1517                 import ldc.intrinsics;
1518                 import ldc.simd;
1519                 __m128i outputBytes = cast(__m128i) shufflevector!(byte16, 3, 0,  1,  2,
1520                                                                            7, 4,  5,  6,
1521                                                                            11, 8,  9,  10,
1522                                                                            15, 12, 13, 14)(cast(byte16)inputBytes, cast(byte16)inputBytes);
1523                 _mm_storeu_si128(cast(__m128i*)(&scan[4*i]), outputBytes);
1524             }
1525             else
1526             {
1527                 // convert to ushort
1528                 __m128i zero = _mm_setzero_si128();
1529                 __m128i e0_7 = _mm_unpacklo_epi8(inputBytes, zero);
1530                 __m128i e8_15 = _mm_unpackhi_epi8(inputBytes, zero);
1531 
1532                 enum int swapRB = _MM_SHUFFLE(2, 1, 0, 3);
1533                 e0_7 = _mm_shufflelo_epi16!swapRB(_mm_shufflehi_epi16!swapRB(e0_7));
1534                 e8_15 = _mm_shufflelo_epi16!swapRB(_mm_shufflehi_epi16!swapRB(e8_15));
1535                 __m128i outputBytes = _mm_packus_epi16(e0_7, e8_15);
1536                 _mm_storeu_si128(cast(__m128i*)(&scan[4*i]), outputBytes);
1537             }
1538         }
1539 
1540         for(; i < w; i ++)
1541         {
1542             ubyte r = scan[4*i];
1543             ubyte g = scan[4*i+1];
1544             ubyte b = scan[4*i+2];
1545             scan[4*i] = 255;
1546             scan[4*i+1] = r;
1547             scan[4*i+2] = g;
1548             scan[4*i+3] = b;
1549         }
1550     }
1551 }
1552 
1553 void shuffleComponentsRGBA8ToBGRA8AndForceAlphaTo255(ImageRef!RGBA image) pure nothrow @nogc
1554 {
1555     immutable int w = image.w;
1556     immutable int h = image.h;
1557     for (int j = 0; j < h; ++j)
1558     {
1559         ubyte* scan = cast(ubyte*)image.scanline(j).ptr;
1560 
1561         int i = 0;
1562         for( ; i + 3 < w; i += 4)
1563         {
1564             __m128i inputBytes = _mm_loadu_si128(cast(__m128i*)(&scan[4*i]));
1565             inputBytes = _mm_or_si128(inputBytes, _mm_set1_epi32(0xff000000));
1566 
1567             version(LDC)
1568             {
1569                 import ldc.intrinsics;
1570                 import ldc.simd;
1571                 __m128i outputBytes = cast(__m128i) shufflevector!(byte16, 2,  1,  0,  3,
1572                                                                            6,  5,  4,  7,
1573                                                                           10,  9,  8, 11,
1574                                                                           14, 13, 12, 15)(cast(byte16)inputBytes, cast(byte16)inputBytes);
1575                 _mm_storeu_si128(cast(__m128i*)(&scan[4*i]), outputBytes);
1576             }
1577             else
1578             {
1579                 // convert to ushort
1580                 __m128i zero = _mm_setzero_si128();
1581                 __m128i e0_7 = _mm_unpacklo_epi8(inputBytes, zero);
1582                 __m128i e8_15 = _mm_unpackhi_epi8(inputBytes, zero);
1583 
1584                 // swap red and green
1585                 enum int swapRB = _MM_SHUFFLE(3, 0, 1, 2);
1586                 e0_7 = _mm_shufflelo_epi16!swapRB(_mm_shufflehi_epi16!swapRB(e0_7));
1587                 e8_15 = _mm_shufflelo_epi16!swapRB(_mm_shufflehi_epi16!swapRB(e8_15));
1588                 __m128i outputBytes = _mm_packus_epi16(e0_7, e8_15);
1589                 _mm_storeu_si128(cast(__m128i*)(&scan[4*i]), outputBytes);
1590             }
1591         }
1592 
1593         for(; i < w; i ++)
1594         {
1595             ubyte r = scan[4*i];
1596             ubyte g = scan[4*i+1];
1597             ubyte b = scan[4*i+2];
1598             scan[4*i] = b;
1599             scan[4*i+1] = g;
1600             scan[4*i+2] = r;
1601             scan[4*i+3] = 255;
1602         }
1603     }
1604 }
1605 
1606 void shuffleComponentsRGBA8ToRGBA8AndForceAlphaTo255(ImageRef!RGBA image) pure nothrow @nogc
1607 {
1608     immutable int w = image.w;
1609     immutable int h = image.h;
1610     for (int j = 0; j < h; ++j)
1611     {
1612         ubyte* scan = cast(ubyte*)image.scanline(j).ptr;
1613 
1614         int i = 0;
1615         for( ; i + 3 < w; i += 4)
1616         {
1617             __m128i inputBytes = _mm_loadu_si128(cast(__m128i*)(&scan[4*i]));
1618             inputBytes = _mm_or_si128(inputBytes, _mm_set1_epi32(0xff000000));
1619             // No reordering to do
1620             _mm_storeu_si128(cast(__m128i*)(&scan[4*i]), inputBytes);
1621         }
1622 
1623         for(; i < w; i ++)
1624         {
1625             scan[4*i+3] = 255;
1626         }
1627     }
1628 }