1 /**
2 Defines `Vec`, `reallocBuffer` and memory functions.
3 
4 Copyright: Guillaume Piolat 2015-2016.
5 License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
6 Authors:   Guillaume Piolat
7 */
8 module dplug.core.vec;
9 
10 import std.traits: hasElaborateDestructor;
11 
12 import core.stdc.stdlib: malloc, free, realloc;
13 import core.stdc.string: memcpy, memmove;
14 
15 import core.exception;
16 import inteli.xmmintrin;
17 
18 
19 // This module deals with aligned memory.
20 // You'll also find here a non-copyable std::vector equivalent `Vec`.
21 
22 /// Allocates an aligned memory chunk.
23 /// Functionally equivalent to Visual C++ _aligned_malloc.
24 /// Do not mix allocations with different alignment.
25 /// Important: `alignedMalloc(0)` does not necessarily return `null`, and its result 
26 ///            _has_ to be freed with `alignedFree`.
27 void* alignedMalloc(size_t size, size_t alignment) nothrow @nogc
28 {
29     assert(alignment != 0);
30 
31     // Short-cut and use the C allocator to avoid overhead if no alignment
32     if (alignment == 1)
33     {
34         // C99:
35         // Implementation-defined behavior
36         // Whether the calloc, malloc, and realloc functions return a null pointer
37         // or a pointer to an allocated object when the size requested is zero.
38         // In any case, we'll have to free() it.
39         return malloc(size);
40     }
41 
42     size_t request = requestedSize(size, alignment);
43     void* raw = malloc(request);
44 
45     if (request > 0 && raw == null) // malloc(0) can validly return anything
46         onOutOfMemoryError();
47 
48     return storeRawPointerPlusSizeAndReturnAligned(raw, size, alignment);
49 }
50 
51 /// Frees aligned memory allocated by alignedMalloc or alignedRealloc.
52 /// Functionally equivalent to Visual C++ _aligned_free.
53 /// Do not mix allocations with different alignment.
54 void alignedFree(void* aligned, size_t alignment) nothrow @nogc
55 {
56     // Short-cut and use the C allocator to avoid overhead if no alignment
57     if (alignment == 1)
58         return free(aligned);
59 
60     // support for free(NULL)
61     if (aligned is null)
62         return;
63 
64     assert(alignment != 0);
65     assert(isPointerAligned(aligned, alignment));
66 
67     void** rawLocation = cast(void**)(cast(char*)aligned - size_t.sizeof);
68     free(*rawLocation);
69 }
70 
71 /// Reallocates an aligned memory chunk allocated by `alignedMalloc` or `alignedRealloc`.
72 /// Functionally equivalent to Visual C++ `_aligned_realloc`.
73 /// Do not mix allocations with different alignment.
74 /// Important: `alignedRealloc(p, 0)` does not necessarily return `null`, and its result 
75 ///            _has_ to be freed with `alignedFree`.
76 void* alignedRealloc(void* aligned, size_t size, size_t alignment) nothrow @nogc
77 {
78     return alignedReallocImpl!true(aligned, size, alignment);
79 }
80 
81 
82 /// Same as `alignedRealloc` but does not preserve data.
83 void* alignedReallocDiscard(void* aligned, size_t size, size_t alignment) nothrow @nogc
84 {
85     return alignedReallocImpl!false(aligned, size, alignment);
86 }
87 
88 
89 /// Returns: `true` if the pointer is suitably aligned.
90 bool isPointerAligned(void* p, size_t alignment) pure nothrow @nogc
91 {
92     assert(alignment != 0);
93     return ( cast(size_t)p & (alignment - 1) ) == 0;
94 }
95 unittest
96 {
97     ubyte b;
98     align(16) ubyte[5] c;
99     assert(isPointerAligned(&b, 1));
100     assert(!isPointerAligned(&c[1], 2));
101     assert(isPointerAligned(&c[4], 4));
102 }
103 
104 /// Does memory slices a[0..a_size] and b[0..b_size] have an overlapping byte?
105 bool isMemoryOverlapping(const(void)* a, ptrdiff_t a_size, 
106                          const(void)* b, ptrdiff_t b_size) pure @trusted
107 {
108     assert(a_size >= 0 && b_size >= 0);
109 
110     if (a is null || b is null)
111         return false;
112 
113     if (a_size == 0 || b_size == 0)
114         return false;
115 
116     ubyte* lA = cast(ubyte*)a;
117     ubyte* hA = lA + a_size;
118     ubyte* lB = cast(ubyte*)b;
119     ubyte* hB = lB + b_size;
120 
121     // There is overlapping, if lA is inside lB..hB, or lB is inside lA..hA
122 
123     if (lA >= lB && lA < hB)
124         return true;
125 
126     if (lB >= lA && lB < hA)
127         return true;
128 
129     return false;
130 }
131 bool isMemoryOverlapping(const(void)[] a, const(void)[] b) pure @trusted
132 {
133     return isMemoryOverlapping(a.ptr, a.length, b.ptr, b.length);
134 }
135 unittest
136 {
137     ubyte[100] a;
138     assert(!isMemoryOverlapping(null, a));
139     assert(!isMemoryOverlapping(a, null));
140     assert(!isMemoryOverlapping(a[1..1], a[0..10]));
141     assert(!isMemoryOverlapping(a[1..10], a[10..100]));
142     assert(!isMemoryOverlapping(a[30..100], a[0..30]));
143     assert(isMemoryOverlapping(a[1..50], a[49..100]));
144     assert(isMemoryOverlapping(a[49..100], a[1..50]));
145     assert(isMemoryOverlapping(a[40..45], a[30..55]));
146     assert(isMemoryOverlapping(a[30..55], a[40..45]));
147 }
148 
149 private nothrow @nogc
150 {
151     void* alignedReallocImpl(bool PreserveDataIfResized)(void* aligned, size_t size, size_t alignment)
152     {
153         // Short-cut and use the C allocator to avoid overhead if no alignment
154         if (alignment == 1)
155         {
156             // C99:
157             // Implementation-defined behavior
158             // Whether the calloc, malloc, and realloc functions return a null pointer
159             // or a pointer to an allocated object when the size requested is zero.
160             // In any case, we'll have to `free()` it.
161             return realloc(aligned, size);
162         }
163 
164         if (aligned is null)
165             return alignedMalloc(size, alignment);
166 
167         assert(alignment != 0);
168         assert(isPointerAligned(aligned, alignment));
169 
170         size_t previousSize = *cast(size_t*)(cast(char*)aligned - size_t.sizeof * 2);
171 
172         void* raw = *cast(void**)(cast(char*)aligned - size_t.sizeof);
173         size_t request = requestedSize(size, alignment);
174         size_t previousRequest = requestedSize(previousSize, alignment);
175         assert(previousRequest - request == previousSize - size); // same alignment
176 
177         // Heuristic: if a requested size is within 50% to 100% of what is already allocated
178         //            then exit with the same pointer
179         if ( (previousRequest < request * 4) && (request <= previousRequest) )
180             return aligned;
181 
182         void* newRaw = malloc(request);
183         static if( __VERSION__ > 2067 ) // onOutOfMemoryError wasn't nothrow before July 2014
184         {
185             if (request > 0 && newRaw == null) // realloc(0) can validly return anything
186                 onOutOfMemoryError();
187         }
188 
189         void* newAligned = storeRawPointerPlusSizeAndReturnAligned(newRaw, size, alignment);
190 
191         static if (PreserveDataIfResized)
192         {
193             size_t minSize = size < previousSize ? size : previousSize;
194             memcpy(newAligned, aligned, minSize); // memcpy OK
195         }
196 
197         // Free previous data
198         alignedFree(aligned, alignment);
199         assert(isPointerAligned(newAligned, alignment));
200         return newAligned;
201     }
202 
203     /// Returns: next pointer aligned with alignment bytes.
204     void* nextAlignedPointer(void* start, size_t alignment) pure
205     {
206         import dplug.core.math : nextMultipleOf;
207         return cast(void*)nextMultipleOf(cast(size_t)(start), alignment);
208     }
209 
210     // Returns number of bytes to actually allocate when asking
211     // for a particular alignement
212     size_t requestedSize(size_t askedSize, size_t alignment) pure
213     {
214         enum size_t pointerSize = size_t.sizeof;
215         return askedSize + alignment - 1 + pointerSize * 2;
216     }
217 
218     // Store pointer given my malloc, and size in bytes initially requested (alignedRealloc needs it)
219     void* storeRawPointerPlusSizeAndReturnAligned(void* raw, size_t size, size_t alignment)
220     {
221         enum size_t pointerSize = size_t.sizeof;
222         char* start = cast(char*)raw + pointerSize * 2;
223         void* aligned = nextAlignedPointer(start, alignment);
224         void** rawLocation = cast(void**)(cast(char*)aligned - pointerSize);
225         *rawLocation = raw;
226         size_t* sizeLocation = cast(size_t*)(cast(char*)aligned - 2 * pointerSize);
227         *sizeLocation = size;
228         assert( isPointerAligned(aligned, alignment) );
229         return aligned;
230     }
231 }
232 
233 unittest
234 {
235     {
236         void* p = alignedMalloc(23, 16);
237         assert(p !is null);
238         assert(((cast(size_t)p) & 0xf) == 0);
239 
240         alignedFree(p, 16);
241     }
242 
243     void* nullAlloc = alignedMalloc(0, 16);
244     assert(nullAlloc != null);
245     nullAlloc = alignedRealloc(nullAlloc, 0, 16);
246     assert(nullAlloc != null);
247     alignedFree(nullAlloc, 16);
248 
249     {
250         int alignment = 16;
251         int* p = null;
252 
253         // check if growing keep values in place
254         foreach(int i; 0..100)
255         {
256             p = cast(int*) alignedRealloc(p, (i + 1) * int.sizeof, alignment);
257             p[i] = i;
258         }
259 
260         foreach(int i; 0..100)
261             assert(p[i] == i);
262 
263         p = cast(int*) alignedRealloc(p, 0, alignment);
264         assert(p !is null);
265 
266         alignedFree(p, alignment);
267     }
268 
269     // Verify that same size alloc preserve pointer. 
270     {
271         void* p = null;
272         p = alignedRealloc(p, 254, 16);
273         void* p2 = alignedRealloc(p, 254, 16);
274         assert(p == p2);
275 
276         // Test shrink heuristic
277         void* p3 = alignedRealloc(p, 128, 16);
278         assert(p == p3);
279         alignedFree(p3, 16);
280     }
281 }
282 
283 
284 
285 /// Used throughout dplug:dsp to avoid reliance on GC.
286 /// Important: Size 0 is special-case to free the slice.
287 /// This works a bit like alignedRealloc except with slices as input.
288 /// You MUST use consistent alignement thoughout the lifetime of this buffer.
289 ///
290 /// Params:
291 ///    buffer = Existing allocated buffer. Can be null. 
292 ///             Input slice length is not considered.
293 ///    length = Desired slice length.
294 ///    alignment = Alignement if the slice has allocation requirements, 1 else. 
295 ///                Must match for deallocation.
296 ///
297 /// Example:
298 /// ---
299 /// import std.stdio;
300 ///
301 /// struct MyDSP
302 /// {
303 /// nothrow @nogc:
304 ///
305 ///     void initialize(int maxFrames)
306 ///     {
307 ///         // mybuf points to maxFrames frames
308 ///         mybuf.reallocBuffer(maxFrames);
309 ///     }   
310 ///
311 ///     ~this()
312 ///     {
313 ///         // If you don't free the buffer, it will leak.    
314 ///         mybuf.reallocBuffer(0); 
315 ///     }
316 ///
317 /// private:
318 ///     float[] mybuf;      
319 /// }
320 /// ---
321 void reallocBuffer(T)(ref T[] buffer, size_t length, int alignment = 1) nothrow @nogc
322 {
323     static if (is(T == struct) && hasElaborateDestructor!T)
324     {
325         static assert(false); // struct with destructors not supported
326     }
327 
328     /// Size 0 is special-case to free the slice.
329     if (length == 0)
330     {
331         alignedFree(buffer.ptr, alignment);
332         buffer = null;
333         return;
334     }
335 
336     T* pointer = cast(T*) alignedRealloc(buffer.ptr, T.sizeof * length, alignment);
337     if (pointer is null)
338         buffer = null; // alignment 1 can still return null
339     else
340         buffer = pointer[0..length];
341 }
342 unittest
343 {
344     int[] arr;
345     arr.reallocBuffer(15);
346     assert(arr.length == 15);
347     arr.reallocBuffer(0);
348     assert(arr.length == 0);
349 }
350 
351 
352 /// Returns: A newly created `Vec`.
353 Vec!T makeVec(T)(size_t initialSize = 0, int alignment = 1) nothrow @nogc
354 {
355     return Vec!T(initialSize, alignment);
356 }
357 
358 /// Kind of a std::vector replacement.
359 /// Grow-only array, points to a (optionally aligned) memory location.
360 /// This can also work as an output range.
361 /// `Vec` is designed to work even when uninitialized, without `makeVec`.
362 /// Warning: it is pretty barebones, doesn't respect T.init or call destructors.
363 ///          When used in a GC program, GC roots won't be registered.
364 struct Vec(T)
365 {
366 nothrow:
367 @nogc:
368 
369     static if (is(T == struct) && hasElaborateDestructor!T)
370     {
371         pragma(msg, "WARNING! struct with destructors were never meant to be supported in Vec!T. This will be removed in Dplug v15.");
372     }
373 
374     public
375     {
376         /// Creates an aligned buffer with given initial size.
377         this(size_t initialSize, int alignment) @safe
378         {
379             assert(alignment != 0);
380             _size = 0;
381             _allocated = 0;
382             _data = null;
383             _alignment = alignment;
384             resizeExactly(initialSize);
385         }
386 
387         ~this() @trusted
388         {
389             if (_data !is null)
390             {
391                 alignedFree(_data, _alignment);
392                 _data = null;
393                 _allocated = 0;
394             }
395         }
396 
397         @disable this(this);
398 
399         /// Returns: Length of buffer in elements.
400         size_t length() pure const @safe
401         {
402             return _size;
403         }
404 
405         /// Return: Allocated size of the underlying array.
406         size_t capacity() pure const @safe
407         {
408             return _allocated;
409         }
410 
411         /// Returns: Length of buffer in elements.
412         alias opDollar = length;
413 
414         /// Resizes a buffer to hold $(D askedSize) elements.
415         void resize(size_t askedSize) @trusted
416         {
417             resizeExactly(askedSize);
418         }
419 
420         /// Pop last element
421         T popBack() @trusted
422         {
423             assert(_size > 0);
424             _size = _size - 1;
425             return _data[_size];
426         }
427 
428         /// Append an element to this buffer.
429         void pushBack(T x) @trusted
430         {
431             size_t i = _size;
432             resizeGrow(_size + 1);
433             _data[i] = x;
434         }
435 
436         // DMD 2.088 deprecates the old D1-operators
437         static if (__VERSION__ >= 2088)
438         {
439             ///ditto
440             void opOpAssign(string op)(T x) @safe if (op == "~")
441             {
442                 pushBack(x);
443             }
444         }
445         else
446         {
447             ///ditto
448             void opCatAssign(T x) @safe
449             {
450                 pushBack(x);
451             }
452         }
453 
454         // Output range support
455         alias put = pushBack;
456 
457         /// Finds an item, returns -1 if not found
458         int indexOf(T x) @trusted
459         {
460             enum bool isStaticArray(T) = __traits(isStaticArray, T);
461 
462             static if (isStaticArray!T)
463             {
464                 // static array would be compared by identity as slice, which is not what we want.
465                 foreach(int i; 0..cast(int)_size)
466                     if (_data[i] == x)
467                         return i;
468             }
469             else
470             {
471                 // base types: identity is equality
472                 // reference types: looking for identity
473                 foreach(int i; 0..cast(int)_size)
474                     if (_data[i] is x)
475                         return i;
476             }
477             return -1;
478         }
479 
480         /// Removes an item and replaces it by the last item.
481         /// Warning: this reorders the array.
482         void removeAndReplaceByLastElement(size_t index) @trusted
483         {
484             assert(index < _size);
485             _data[index] = _data[--_size];
486         }
487 
488         /// Removes an item and shift the rest of the array to front by 1.
489         /// Warning: O(N) complexity.
490         void removeAndShiftRestOfArray(size_t index) @trusted
491         {
492             assert(index < _size);
493             for (; index + 1 < _size; ++index)
494                 _data[index] = _data[index+1];
495         }
496 
497         /// Appends another buffer to this buffer.
498         void pushBack(ref Vec other) @trusted
499         {
500             size_t oldSize = _size;
501             resizeGrow(_size + other._size);
502             memmove(_data + oldSize, other._data, T.sizeof * other._size);
503         }
504 
505         /// Appends a slice to this buffer.
506         /// `slice` should not belong to the same buffer _data.
507         void pushBack(T[] slice) @trusted
508         {
509             size_t oldSize = _size;
510             size_t newSize = _size + slice.length;
511             resizeGrow(newSize);
512             for (size_t n = 0; n < slice.length; ++n)
513                 _data[oldSize + n] = slice[n];
514         }
515 
516         /// Returns: Raw pointer to data.
517         @property inout(T)* ptr() inout @system
518         {
519             return _data;
520         }
521 
522         /// Returns: n-th element.
523         ref inout(T) opIndex(size_t i) pure inout @trusted
524         {
525             return _data[i];
526         }
527 
528         T opIndexAssign(T x, size_t i) pure @trusted
529         {
530             return _data[i] = x;
531         }
532 
533         /// Sets size to zero, but keeps allocated buffers.
534         void clearContents() pure @safe
535         {
536             _size = 0;
537         }
538 
539         /// Returns: Whole content of the array in one slice.
540         inout(T)[] opSlice() inout @safe
541         {
542             return opSlice(0, length());
543         }
544 
545         /// Returns: A slice of the array.
546         inout(T)[] opSlice(size_t i1, size_t i2) inout @trusted
547         {
548             return _data[i1 .. i2];
549         }
550 
551         /// Fills the buffer with the same value.
552         void fill(T x) @trusted
553         {
554             _data[0.._size] = x;
555         }
556 
557         /// Move. Give up owner ship of the data.
558         T[] releaseData() @system
559         {
560             T[] data = _data[0.._size];
561             assert(_alignment == 1); // else would need to be freed with alignedFree.
562             this._data = null;
563             this._size = 0;
564             this._allocated = 0;
565             this._alignment = 0;
566             return data;
567         }
568     }
569 
570     private
571     {
572         size_t _size = 0;
573         T* _data = null;
574         size_t _allocated = 0;
575         size_t _alignment = 1; // for an unaligned Vec, you probably are not interested in alignment
576 
577         /// Used internally to grow in response to a pushBack operation.
578         /// Different heuristic, since we know that the resize is likely to be repeated for an 
579         /// increasing size later.
580         void resizeGrow(size_t askedSize) @trusted
581         {
582             if (_allocated < askedSize)
583             {
584 
585                 version(all)
586                 {
587                     size_t newCap = computeNewCapacity(askedSize, _size);
588                     setCapacity(newCap);
589                 }
590                 else
591                 {
592                     setCapacity(2 * askedSize);
593                 }
594             }
595             _size = askedSize;
596         }
597 
598         // Resizes the `Vector` to hold exactly `askedSize` elements.
599         // Still if the allocated capacity is larger, do nothing.
600         void resizeExactly(size_t askedSize) @trusted
601         {
602             if (_allocated < askedSize)
603             {
604                 setCapacity(askedSize);
605             }
606             _size = askedSize;
607         }
608 
609         // Internal use, realloc internal buffer, copy existing items.
610         // Doesn't initialize the new ones.
611         void setCapacity(size_t cap)
612         {
613             size_t numBytes = cap * T.sizeof;
614             _data = cast(T*)(alignedRealloc(_data, numBytes, _alignment));
615             _allocated = cap;
616         }
617 
618         // Compute new capacity, while growing.
619         size_t computeNewCapacity(size_t newLength, size_t oldLength)
620         {
621             // Optimal value (Windows malloc) not far from there.
622             enum size_t PAGESIZE = 4096; 
623 
624             size_t newLengthBytes = newLength * T.sizeof;
625             if (newLengthBytes > PAGESIZE)
626             {
627                 // Larger arrays need a smaller growth factor to avoid wasting too much bytes.
628                 // This was found when tracing curve, not too far from golden ratio for some reason.
629                 return newLength + newLength / 2 + newLength / 8;
630             }
631             else
632             {
633                 // For smaller arrays being pushBack, can bring welcome speed by minimizing realloc.
634                 return newLength * 3;
635             }            
636         }
637     }
638 }
639 
640 unittest
641 {
642     import std.range.primitives;
643     static assert(isOutputRange!(Vec!ubyte, ubyte));
644 
645 
646     import std.random;
647     int NBUF = 200;
648 
649     Xorshift32 rng;
650     rng.seed(0xBAADC0DE);
651 
652     struct box2i { int a, b, c, d; }
653     Vec!box2i[] boxes;
654     boxes.length = NBUF;
655 
656     foreach(i; 0..NBUF)
657     {
658         boxes[i] = makeVec!box2i();
659     }
660 
661     foreach(j; 0..200)
662     {
663         foreach(i; 0..NBUF)
664         {
665             int previousSize = cast(int)(boxes[i].length);
666             void* previousPtr = boxes[i].ptr;
667             foreach(int k; 0..cast(int)(boxes[i].length))
668                 boxes[i][k] = box2i(k, k, k, k);
669 
670             int newSize = uniform(0, 100, rng);
671             boxes[i].resize(newSize);
672 
673             int minSize = previousSize;
674             if (minSize > boxes[i].length)
675                 minSize = cast(int)(boxes[i].length);
676 
677             void* newPtr = boxes[i].ptr;
678             foreach(int k; 0..minSize)
679             {
680                 box2i item = boxes[i][k];
681                 box2i shouldBe = box2i(k, k, k, k);
682                 assert(item == shouldBe);
683             }
684 
685             int sum = 0;
686             foreach(k; 0..newSize)
687             {
688                 box2i bb = boxes[i][k];
689                 sum += bb.a + bb.b + bb.c + bb.d;
690             }
691         }
692     }
693 
694     foreach(i; 0..NBUF)
695         boxes[i].destroy();
696 
697     {
698         auto buf = makeVec!int;
699         enum N = 10;
700         buf.resize(N);
701         foreach(i ; 0..N)
702             buf[i] = i;
703 
704         foreach(i ; 0..N)
705             assert(buf[i] == i);
706 
707         auto buf2 = makeVec!int;
708         buf2.pushBack(11);
709         buf2.pushBack(14);
710 
711         // test pushBack(slice)
712         buf.clearContents();
713         buf.pushBack(buf2[]);
714         assert(buf[0] == 11);
715         assert(buf[1] == 14);
716 
717         // test pushBack(slice)
718         buf2[1] = 8;
719         buf.clearContents();
720         buf.pushBack(buf2);
721         assert(buf[0] == 11);
722         assert(buf[1] == 8);
723     }
724 }
725 
726 // Vec should work without any initialization
727 unittest
728 {
729     Vec!string vec;
730 
731     foreach(e; vec[])
732     {        
733     }
734 
735     assert(vec.length == 0);
736     vec.clearContents();
737     vec.resize(0);
738     assert(vec == vec.init);
739     vec.fill("filler");
740     assert(vec.ptr is null);
741 }
742 
743 // Issue #312: vec.opIndex not returning ref which break struct assignment
744 unittest
745 {
746     static struct A
747     {
748         int x;
749     }
750     Vec!A vec = makeVec!A();
751     A a;
752     vec.pushBack(a);
753     vec ~= a;
754     vec[0].x = 42; // vec[0] needs to return a ref
755     assert(vec[0].x == 42);
756 }
757 
758 
759 
760 /// Allows to merge the allocation of several arrays, which saves allocation count and can speed up things thanks to locality.
761 ///
762 /// Example: see below unittest.
763 struct MergedAllocation
764 {
765 nothrow:
766 @nogc:
767 
768     // In debug mode, add stomp detection to `MergedAllocation`.
769     // This takes additional complexity to coarse check for stomping nearby buffers.
770     debug
771     {
772         private enum mergedAllocStompWarning = true;
773     }
774     else
775     {
776         private enum mergedAllocStompWarning = false;
777     }
778 
779 
780     // This adds 32-byte of sentinels around each allocation,
781     // and check at the end of the program that they were unaffected.
782     static if (mergedAllocStompWarning)
783     {
784         // Number of bytes to write between areas to check for stomping. 
785         enum SENTINEL_BYTES = 32; 
786     }
787 
788     enum maxExpectedAlignment = 32;
789 
790     /// Start defining the area of allocations.
791     void start()
792     {
793         // Detect memory errors in former uses.
794         static if (mergedAllocStompWarning)
795         {
796             checkSentinelAreas();
797             _allocateWasCalled = false;
798         }
799 
800         _base = cast(ubyte*)(cast(size_t)0);
801     }
802 
803     /// Allocate (or count) space needed for `numElems` elements of type `T` with given alignment.
804     /// This gets called twice for each array, see example for usage.
805     ///
806     /// This bumps the internal bump allocator.
807     /// Giving null to this chain and converting the result to size_t give the total needed size 
808     /// for the merged allocation.
809     ///
810     /// Warning: 
811     ///          - If called after a `start()` call, the area returned are wrong and are only for 
812     ///             counting needed bytes. Don't use those.
813     ///
814     ///          - If called after an `allocate()` call, the area returned are an actual merged 
815     ///            allocation (if the same calls are done).
816     ///
817     /// Warning: Prefer `allocArray` over `alloc` variant, since the extra length field WILL help 
818     ///          you catch memory errors before release. Else it is very common to miss buffer 
819     ///          overflows in samplerate changes.
820     void allocArray(T)(out T[] array, size_t numElems, size_t alignment = 1)
821     {
822         assert(alignment <= maxExpectedAlignment);
823         assert( (alignment != 0) && ((alignment & (alignment - 1)) == 0)); // power of two
824 
825         size_t adr = cast(size_t) _base;
826 
827         // 1. Align base address
828         size_t mask = ~(alignment - 1);
829         adr = (adr + alignment - 1) & mask;
830 
831         // 2. Assign array and base.
832         array = (cast(T*)adr)[0..numElems];
833         adr += T.sizeof * numElems;
834         _base = cast(ubyte*) adr;
835 
836         static if (mergedAllocStompWarning)
837         {
838             if (_allocateWasCalled && _allocation !is null)
839             {
840                 // Each allocated area followed with SENTINEL_BYTES bytes of value 0xCC
841                 _base[0..SENTINEL_BYTES] = 0xCC;
842                 registerSentinel(_base);
843             }
844             _base += SENTINEL_BYTES;
845         }
846     }
847 
848     ///ditto
849     void alloc(T)(out T* array, size_t numElems, size_t alignment = 1)
850     {
851         T[] arr;
852         allocArray(arr, numElems, alignment);
853         array = arr.ptr;
854     }
855 
856     /// Allocate actual storage for the merged allocation. From there, you need to define exactly the same area with `alloc` and `allocArray`.
857     /// This time they will get a proper value.
858     void allocate()
859     {
860         static if (mergedAllocStompWarning) _allocateWasCalled = true;
861 
862         size_t sizeNeeded =  cast(size_t)_base; // since it was fed 0 at start.
863 
864         if (sizeNeeded == 0)
865         {
866             // If no bytes are requested, it means no buffer were requested, or only with zero size.
867             // We will return a null pointer in that case, since accessing them would be illegal anyway.
868             _allocation = null;
869         }
870         else
871         {
872             // the merged allocation needs to have the largest expected alignment, else the size could depend on the hazards
873             // of the allocation. With maximum alignment, padding is the same so long as areas have smaller or equal alignment requirements.
874             _allocation = cast(ubyte*) _mm_realloc(_allocation, sizeNeeded, maxExpectedAlignment);
875         }
876 
877         // So that the next layout call points to the right area.
878         _base = _allocation;
879     }
880 
881     ~this()
882     {
883         static if (mergedAllocStompWarning)
884         {
885             checkSentinelAreas();
886         }
887 
888         if (_allocation != null)
889         {
890             _mm_free(_allocation);
891             _allocation = null;
892         }
893     }
894 
895 private:
896 
897     // Location of the allocation.
898     ubyte* _allocation = null;
899 
900     ///
901     ubyte* _base = null;
902 
903     static if (mergedAllocStompWarning)
904     {
905         bool _allocateWasCalled = false;
906 
907         Vec!(ubyte*) _sentinels; // start of sentinel area (SENTINAL_BYTES long)
908 
909         void registerSentinel(ubyte* start)
910         {
911             _sentinels.pushBack(start);
912         }
913 
914         bool hasMemoryError() // true if sentinel bytes stomped
915         {
916             assert(_allocateWasCalled && _allocation !is null);
917 
918             foreach(ubyte* s; _sentinels[])
919             {
920                 for (int n = 0; n < 32; ++n)
921                 {
922                     if (s[n] != 0xCC)
923                         return true;
924                 }
925             }
926             return false;
927         }
928 
929         // Check existing sentinels, and unregister them
930         void checkSentinelAreas()
931         {
932             if (!_allocateWasCalled)
933                 return; // still haven't done an allocation, nothing to check.
934 
935             if (_allocation is null)
936                 return; // nothing to check
937 
938             // If you fail here, there is a memory error in your access patterns.
939             // Sentinel bytes of value 0xCC were overwritten in a `MergedAllocation`.
940             // You can use slices with `allocArray` instead of `alloc` to find the faulty 
941             // access. This check doesn't catch everything!
942             assert(! hasMemoryError());
943 
944             _sentinels.clearContents();
945         }
946     }
947 }
948 
949 
950 // Here is how you should use MergedAllocation. 
951 unittest
952 {
953     static struct MyDSPStruct
954     {
955     public:
956     nothrow:
957     @nogc:
958         void initialize(int maxFrames)
959         {
960             _mergedAlloc.start();
961             layout(_mergedAlloc, maxFrames); // you need such a layout function to be called twice.
962             _mergedAlloc.allocate();
963             layout(_mergedAlloc, maxFrames); // the first time arrays area allocated in the `null` area, the second time in
964                                              // actually allocated memory (since we now have the needed length).
965         }
966     
967         void layout(ref MergedAllocation ma, int maxFrames)
968         {
969             // allocate `maxFrames` elems, and return a slice in `_intermediateBuf`.
970             ma.allocArray(_intermediateBuf, maxFrames); 
971 
972             // allocate `maxFrames` elems, aligned to 16-byte boundaries. Return a pointer to that in `_coeffs`.
973             ma.alloc(_coeffs, maxFrames, 16);
974         }
975 
976     private:
977         float[] _intermediateBuf;
978         double* _coeffs;
979         MergedAllocation _mergedAlloc;
980     }
981 
982     MyDSPStruct s;
983     s.initialize(14);
984     s._coeffs[0..14] = 1.0f;
985     s._intermediateBuf[0..14] = 1.0f;
986     s.initialize(17);
987     s._coeffs[0..17] = 1.0f;
988     s._intermediateBuf[0..17] = 1.0f;
989 }
990 
991 // Should be valid to allocate nothing with a MergedAllocation.
992 unittest
993 {
994     MergedAllocation ma;
995     ma.start();
996     ma.allocate();
997     assert(ma._allocation == null);
998 }
999 
1000 // Should be valid to allocate nothing with a MergedAllocation.
1001 unittest
1002 {
1003     MergedAllocation ma;
1004     ma.start();
1005     ubyte[] arr, arr2;
1006     ma.allocArray(arr, 17);
1007     ma.allocArray(arr2, 24);
1008     assert(ma._allocation == null);
1009 
1010     ma.allocate();
1011     ma.allocArray(arr, 17);
1012     ma.allocArray(arr2, 24);
1013     assert(ma._allocation != null);
1014     assert(arr.length == 17);
1015     assert(arr2.length == 24);
1016 
1017     // Test memory error detection with a simple case.
1018     static if (MergedAllocation.mergedAllocStompWarning)
1019     {
1020         assert(!ma.hasMemoryError());
1021 
1022         // Create a memory error
1023         arr.ptr[18] = 2;
1024         assert(ma.hasMemoryError());
1025         arr.ptr[18] = 0xCC; // undo the error to avoid stopping the unittests
1026     }
1027 }