1 /**
2 Defines `Vec`, `reallocBuffer` and memory functions.
3 
4 Copyright: Guillaume Piolat 2015-2016.
5 License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
6 Authors:   Guillaume Piolat
7 */
8 module dplug.core.vec;
9 
10 import std.traits: hasElaborateDestructor;
11 
12 import core.stdc.stdlib: malloc, free, realloc;
13 import core.stdc.string: memcpy, memmove;
14 
15 import core.exception;
16 import inteli.xmmintrin;
17 
18 
19 // This module deals with aligned memory.
20 // You'll also find here a non-copyable std::vector equivalent `Vec`.
21 
22 /// Allocates an aligned memory chunk.
23 /// Functionally equivalent to Visual C++ _aligned_malloc.
24 /// Do not mix allocations with different alignment.
25 /// Important: `alignedMalloc(0)` does not necessarily return `null`, and its result 
26 ///            _has_ to be freed with `alignedFree`.
27 void* alignedMalloc(size_t size, size_t alignment) nothrow @nogc
28 {
29     assert(alignment != 0);
30 
31     // Short-cut and use the C allocator to avoid overhead if no alignment
32     if (alignment == 1)
33     {
34         // C99:
35         // Implementation-defined behavior
36         // Whether the calloc, malloc, and realloc functions return a null pointer
37         // or a pointer to an allocated object when the size requested is zero.
38         // In any case, we'll have to free() it.
39         return malloc(size);
40     }
41 
42     size_t request = requestedSize(size, alignment);
43     void* raw = malloc(request);
44 
45     if (request > 0 && raw == null) // malloc(0) can validly return anything
46         onOutOfMemoryError();
47 
48     return storeRawPointerPlusSizeAndReturnAligned(raw, size, alignment);
49 }
50 
51 /// Frees aligned memory allocated by alignedMalloc or alignedRealloc.
52 /// Functionally equivalent to Visual C++ _aligned_free.
53 /// Do not mix allocations with different alignment.
54 void alignedFree(void* aligned, size_t alignment) nothrow @nogc
55 {
56     // Short-cut and use the C allocator to avoid overhead if no alignment
57     if (alignment == 1)
58         return free(aligned);
59 
60     // support for free(NULL)
61     if (aligned is null)
62         return;
63 
64     assert(alignment != 0);
65     assert(isPointerAligned(aligned, alignment));
66 
67     void** rawLocation = cast(void**)(cast(char*)aligned - size_t.sizeof);
68     free(*rawLocation);
69 }
70 
71 /// Reallocates an aligned memory chunk allocated by `alignedMalloc` or `alignedRealloc`.
72 /// Functionally equivalent to Visual C++ `_aligned_realloc`.
73 /// Do not mix allocations with different alignment.
74 /// Important: `alignedRealloc(p, 0)` does not necessarily return `null`, and its result 
75 ///            _has_ to be freed with `alignedFree`.
76 void* alignedRealloc(void* aligned, size_t size, size_t alignment) nothrow @nogc
77 {
78     return alignedReallocImpl!true(aligned, size, alignment);
79 }
80 
81 
82 /// Same as `alignedRealloc` but does not preserve data.
83 void* alignedReallocDiscard(void* aligned, size_t size, size_t alignment) nothrow @nogc
84 {
85     return alignedReallocImpl!false(aligned, size, alignment);
86 }
87 
88 
89 /// Returns: `true` if the pointer is suitably aligned.
90 bool isPointerAligned(void* p, size_t alignment) pure nothrow @nogc
91 {
92     assert(alignment != 0);
93     return ( cast(size_t)p & (alignment - 1) ) == 0;
94 }
95 unittest
96 {
97     ubyte b;
98     align(16) ubyte[5] c;
99     assert(isPointerAligned(&b, 1));
100     assert(!isPointerAligned(&c[1], 2));
101     assert(isPointerAligned(&c[4], 4));
102 }
103 
104 /// Does memory slices a[0..a_size] and b[0..b_size] have an overlapping byte?
105 bool isMemoryOverlapping(const(void)* a, ptrdiff_t a_size, 
106                          const(void)* b, ptrdiff_t b_size) pure @trusted
107 {
108     assert(a_size >= 0 && b_size >= 0);
109 
110     if (a is null || b is null)
111         return false;
112 
113     if (a_size == 0 || b_size == 0)
114         return false;
115 
116     ubyte* lA = cast(ubyte*)a;
117     ubyte* hA = lA + a_size;
118     ubyte* lB = cast(ubyte*)b;
119     ubyte* hB = lB + b_size;
120 
121     // There is overlapping, if lA is inside lB..hB, or lB is inside lA..hA
122 
123     if (lA >= lB && lA < hB)
124         return true;
125 
126     if (lB >= lA && lB < hA)
127         return true;
128 
129     return false;
130 }
131 bool isMemoryOverlapping(const(void)[] a, const(void)[] b) pure @trusted
132 {
133     return isMemoryOverlapping(a.ptr, a.length, b.ptr, b.length);
134 }
135 unittest
136 {
137     ubyte[100] a;
138     assert(!isMemoryOverlapping(null, a));
139     assert(!isMemoryOverlapping(a, null));
140     assert(!isMemoryOverlapping(a[1..1], a[0..10]));
141     assert(!isMemoryOverlapping(a[1..10], a[10..100]));
142     assert(!isMemoryOverlapping(a[30..100], a[0..30]));
143     assert(isMemoryOverlapping(a[1..50], a[49..100]));
144     assert(isMemoryOverlapping(a[49..100], a[1..50]));
145     assert(isMemoryOverlapping(a[40..45], a[30..55]));
146     assert(isMemoryOverlapping(a[30..55], a[40..45]));
147 }
148 
149 private nothrow @nogc
150 {
151     void* alignedReallocImpl(bool PreserveDataIfResized)(void* aligned, size_t size, size_t alignment)
152     {
153         // Short-cut and use the C allocator to avoid overhead if no alignment
154         if (alignment == 1)
155         {
156             // C99:
157             // Implementation-defined behavior
158             // Whether the calloc, malloc, and realloc functions return a null pointer
159             // or a pointer to an allocated object when the size requested is zero.
160             // In any case, we'll have to `free()` it.
161             return realloc(aligned, size);
162         }
163 
164         if (aligned is null)
165             return alignedMalloc(size, alignment);
166 
167         assert(alignment != 0);
168         assert(isPointerAligned(aligned, alignment));
169 
170         size_t previousSize = *cast(size_t*)(cast(char*)aligned - size_t.sizeof * 2);
171 
172         void* raw = *cast(void**)(cast(char*)aligned - size_t.sizeof);
173         size_t request = requestedSize(size, alignment);
174         size_t previousRequest = requestedSize(previousSize, alignment);
175         assert(previousRequest - request == previousSize - size); // same alignment
176 
177         // Heuristic: if a requested size is within 50% to 100% of what is already allocated
178         //            then exit with the same pointer
179         if ( (previousRequest < request * 4) && (request <= previousRequest) )
180             return aligned;
181 
182         void* newRaw = malloc(request);
183         static if( __VERSION__ > 2067 ) // onOutOfMemoryError wasn't nothrow before July 2014
184         {
185             if (request > 0 && newRaw == null) // realloc(0) can validly return anything
186                 onOutOfMemoryError();
187         }
188 
189         void* newAligned = storeRawPointerPlusSizeAndReturnAligned(newRaw, size, alignment);
190 
191         static if (PreserveDataIfResized)
192         {
193             size_t minSize = size < previousSize ? size : previousSize;
194             memcpy(newAligned, aligned, minSize); // memcpy OK
195         }
196 
197         // Free previous data
198         alignedFree(aligned, alignment);
199         assert(isPointerAligned(newAligned, alignment));
200         return newAligned;
201     }
202 
203     /// Returns: next pointer aligned with alignment bytes.
204     void* nextAlignedPointer(void* start, size_t alignment) pure
205     {
206         import dplug.core.math : nextMultipleOf;
207         return cast(void*)nextMultipleOf(cast(size_t)(start), alignment);
208     }
209 
210     // Returns number of bytes to actually allocate when asking
211     // for a particular alignement
212     size_t requestedSize(size_t askedSize, size_t alignment) pure
213     {
214         enum size_t pointerSize = size_t.sizeof;
215         return askedSize + alignment - 1 + pointerSize * 2;
216     }
217 
218     // Store pointer given my malloc, and size in bytes initially requested (alignedRealloc needs it)
219     void* storeRawPointerPlusSizeAndReturnAligned(void* raw, size_t size, size_t alignment)
220     {
221         enum size_t pointerSize = size_t.sizeof;
222         char* start = cast(char*)raw + pointerSize * 2;
223         void* aligned = nextAlignedPointer(start, alignment);
224         void** rawLocation = cast(void**)(cast(char*)aligned - pointerSize);
225         *rawLocation = raw;
226         size_t* sizeLocation = cast(size_t*)(cast(char*)aligned - 2 * pointerSize);
227         *sizeLocation = size;
228         assert( isPointerAligned(aligned, alignment) );
229         return aligned;
230     }
231 }
232 
233 unittest
234 {
235     {
236         void* p = alignedMalloc(23, 16);
237         assert(p !is null);
238         assert(((cast(size_t)p) & 0xf) == 0);
239 
240         alignedFree(p, 16);
241     }
242 
243     void* nullAlloc = alignedMalloc(0, 16);
244     assert(nullAlloc != null);
245     nullAlloc = alignedRealloc(nullAlloc, 0, 16);
246     assert(nullAlloc != null);
247     alignedFree(nullAlloc, 16);
248 
249     {
250         int alignment = 16;
251         int* p = null;
252 
253         // check if growing keep values in place
254         foreach(int i; 0..100)
255         {
256             p = cast(int*) alignedRealloc(p, (i + 1) * int.sizeof, alignment);
257             p[i] = i;
258         }
259 
260         foreach(int i; 0..100)
261             assert(p[i] == i);
262 
263         p = cast(int*) alignedRealloc(p, 0, alignment);
264         assert(p !is null);
265 
266         alignedFree(p, alignment);
267     }
268 
269     // Verify that same size alloc preserve pointer. 
270     {
271         void* p = null;
272         p = alignedRealloc(p, 254, 16);
273         void* p2 = alignedRealloc(p, 254, 16);
274         assert(p == p2);
275 
276         // Test shrink heuristic
277         void* p3 = alignedRealloc(p, 128, 16);
278         assert(p == p3);
279         alignedFree(p3, 16);
280     }
281 }
282 
283 
284 
285 /// Used throughout dplug:dsp to avoid reliance on GC.
286 /// Important: Size 0 is special-case to free the slice.
287 /// This works a bit like alignedRealloc except with slices as input.
288 /// You MUST use consistent alignement thoughout the lifetime of this buffer.
289 ///
290 /// Params:
291 ///    buffer = Existing allocated buffer. Can be null. 
292 ///             Input slice length is not considered.
293 ///    length = Desired slice length.
294 ///    alignment = Alignement if the slice has allocation requirements, 1 else. 
295 ///                Must match for deallocation.
296 ///
297 /// Example:
298 /// ---
299 /// import std.stdio;
300 ///
301 /// struct MyDSP
302 /// {
303 /// nothrow @nogc:
304 ///
305 ///     void initialize(int maxFrames)
306 ///     {
307 ///         // mybuf points to maxFrames frames
308 ///         mybuf.reallocBuffer(maxFrames);
309 ///     }   
310 ///
311 ///     ~this()
312 ///     {
313 ///         // If you don't free the buffer, it will leak.    
314 ///         mybuf.reallocBuffer(0); 
315 ///     }
316 ///
317 /// private:
318 ///     float[] mybuf;      
319 /// }
320 /// ---
321 void reallocBuffer(T)(ref T[] buffer, size_t length, int alignment = 1) nothrow @nogc
322 {
323     static if (is(T == struct) && hasElaborateDestructor!T)
324     {
325         static assert(false); // struct with destructors not supported
326     }
327 
328     /// Size 0 is special-case to free the slice.
329     if (length == 0)
330     {
331         alignedFree(buffer.ptr, alignment);
332         buffer = null;
333         return;
334     }
335 
336     T* pointer = cast(T*) alignedRealloc(buffer.ptr, T.sizeof * length, alignment);
337     if (pointer is null)
338         buffer = null; // alignment 1 can still return null
339     else
340         buffer = pointer[0..length];
341 }
342 unittest
343 {
344     int[] arr;
345     arr.reallocBuffer(15);
346     assert(arr.length == 15);
347     arr.reallocBuffer(0);
348     assert(arr.length == 0);
349 }
350 
351 
352 /// Returns: A newly created `Vec`.
353 Vec!T makeVec(T)(size_t initialSize = 0, int alignment = 1) nothrow @nogc
354 {
355     return Vec!T(initialSize, alignment);
356 }
357 
358 /// Kind of a std::vector replacement.
359 /// Grow-only array, points to a (optionally aligned) memory location.
360 /// This can also work as an output range.
361 /// `Vec` is designed to work even when uninitialized, without `makeVec`.
362 /// Warning: it is pretty barebones, doesn't respect T.init or call destructors.
363 ///          When used in a GC program, GC roots won't be registered.
364 struct Vec(T)
365 {
366 nothrow:
367 @nogc:
368 
369     static if (is(T == struct) && hasElaborateDestructor!T)
370     {
371         pragma(msg, "WARNING! struct with destructors were never meant to be supported in Vec!T. This will be removed in Dplug v15.");
372     }
373 
374     public
375     {
376         /// Creates an aligned buffer with given initial size.
377         this(size_t initialSize, int alignment) @safe
378         {
379             assert(alignment != 0);
380             _size = 0;
381             _allocated = 0;
382             _data = null;
383             _alignment = alignment;
384             resizeExactly(initialSize);
385         }
386 
387         ~this() @trusted
388         {
389             if (_data !is null)
390             {
391                 alignedFree(_data, _alignment);
392                 _data = null;
393                 _allocated = 0;
394             }
395         }
396 
397         @disable this(this);
398 
399         /// Returns: Length of buffer in elements.
400         size_t length() pure const @safe
401         {
402             return _size;
403         }
404         ///ditto
405         alias size = length;
406 
407         /// Returns: Length of buffer in elements, as signed.
408         ptrdiff_t slength() pure const @safe
409         {
410             return _size;
411         }
412         ///ditto
413         alias ssize = length;
414 
415         /// Returns: `true` if length is zero.
416         bool isEmpty() pure const @safe
417         {
418             return _size == 0;
419         }
420 
421         /// Returns: Allocated size of the underlying array.
422         size_t capacity() pure const @safe
423         {
424             return _allocated;
425         }
426 
427         /// Returns: Length of buffer in elements.
428         alias opDollar = length;
429 
430         /// Resizes a buffer to hold $(D askedSize) elements.
431         void resize(size_t askedSize) @trusted
432         {
433             resizeExactly(askedSize);
434         }
435 
436         /// Pop last element
437         T popBack() @trusted
438         {
439             assert(_size > 0);
440             _size = _size - 1;
441             return _data[_size];
442         }
443 
444         /// Append an element to this buffer.
445         void pushBack(T x) @trusted
446         {
447             size_t i = _size;
448             resizeGrow(_size + 1);
449             _data[i] = x;
450         }
451 
452         // DMD 2.088 deprecates the old D1-operators
453         static if (__VERSION__ >= 2088)
454         {
455             ///ditto
456             void opOpAssign(string op)(T x) @safe if (op == "~")
457             {
458                 pushBack(x);
459             }
460         }
461         else
462         {
463             ///ditto
464             void opCatAssign(T x) @safe
465             {
466                 pushBack(x);
467             }
468         }
469 
470         // Output range support
471         alias put = pushBack;
472 
473         /// Finds an item, returns -1 if not found
474         int indexOf(T x) @trusted
475         {
476             enum bool isStaticArray(T) = __traits(isStaticArray, T);
477 
478             static if (isStaticArray!T)
479             {
480                 // static array would be compared by identity as slice, which is not what we want.
481                 foreach(int i; 0..cast(int)_size)
482                     if (_data[i] == x)
483                         return i;
484             }
485             else
486             {
487                 // base types: identity is equality
488                 // reference types: looking for identity
489                 foreach(int i; 0..cast(int)_size)
490                     if (_data[i] is x)
491                         return i;
492             }
493             return -1;
494         }
495 
496         /// Removes an item and replaces it by the last item.
497         /// Warning: this reorders the array.
498         void removeAndReplaceByLastElement(size_t index) @trusted
499         {
500             assert(index < _size);
501             _data[index] = _data[--_size];
502         }
503 
504         /// Removes an item and shift the rest of the array to front by 1.
505         /// Warning: O(N) complexity.
506         void removeAndShiftRestOfArray(size_t index) @trusted
507         {
508             assert(index < _size);
509             for (; index + 1 < _size; ++index)
510                 _data[index] = _data[index+1];
511             --_size;
512         }
513 
514         /// Removes a range of items and shift the rest of the array to front.
515         /// Warning: O(N) complexity.
516         void removeAndShiftRestOfArray(size_t first, size_t last) @trusted
517         {
518             assert(first <= last && first <= _size && (last <= _size));
519             size_t remain = _size - last;
520             for (size_t n = 0; n < remain; ++n)
521                 _data[first + n] = _data[last + n];
522             _size -= (last - first);
523         }
524 
525         /// Appends another buffer to this buffer.
526         void pushBack(ref Vec other) @trusted
527         {
528             size_t oldSize = _size;
529             resizeGrow(_size + other._size);
530             memmove(_data + oldSize, other._data, T.sizeof * other._size);
531         }
532 
533         /// Appends a slice to this buffer.
534         /// `slice` should not belong to the same buffer _data.
535         void pushBack(T[] slice) @trusted
536         {
537             size_t oldSize = _size;
538             size_t newSize = _size + slice.length;
539             resizeGrow(newSize);
540             for (size_t n = 0; n < slice.length; ++n)
541                 _data[oldSize + n] = slice[n];
542         }
543 
544         /// Returns: Raw pointer to data.
545         @property inout(T)* ptr() inout @system
546         {
547             return _data;
548         }
549 
550         /// Returns: n-th element.
551         ref inout(T) opIndex(size_t i) pure inout @trusted
552         {
553             return _data[i];
554         }
555 
556         T opIndexAssign(T x, size_t i) pure @trusted
557         {
558             return _data[i] = x;
559         }
560 
561         /// Sets size to zero, but keeps allocated buffers.
562         void clearContents() pure @safe
563         {
564             _size = 0;
565         }
566 
567         /// Returns: Whole content of the array in one slice.
568         inout(T)[] opSlice() inout @safe
569         {
570             return opSlice(0, length());
571         }
572 
573         /// Returns: A slice of the array.
574         inout(T)[] opSlice(size_t i1, size_t i2) inout @trusted
575         {
576             return _data[i1 .. i2];
577         }
578 
579         /// Fills the buffer with the same value.
580         void fill(T x) @trusted
581         {
582             _data[0.._size] = x;
583         }
584 
585         /// Move. Give up owner ship of the data.
586         T[] releaseData() @system
587         {
588             T[] data = _data[0.._size];
589             assert(_alignment == 1); // else would need to be freed with alignedFree.
590             this._data = null;
591             this._size = 0;
592             this._allocated = 0;
593             this._alignment = 0;
594             return data;
595         }
596     }
597 
598     private
599     {
600         size_t _size = 0;
601         T* _data = null;
602         size_t _allocated = 0;
603         size_t _alignment = 1; // for an unaligned Vec, you probably are not interested in alignment
604 
605         /// Used internally to grow in response to a pushBack operation.
606         /// Different heuristic, since we know that the resize is likely to be repeated for an 
607         /// increasing size later.
608         void resizeGrow(size_t askedSize) @trusted
609         {
610             if (_allocated < askedSize)
611             {
612 
613                 version(all)
614                 {
615                     size_t newCap = computeNewCapacity(askedSize, _size);
616                     setCapacity(newCap);
617                 }
618                 else
619                 {
620                     setCapacity(2 * askedSize);
621                 }
622             }
623             _size = askedSize;
624         }
625 
626         // Resizes the `Vector` to hold exactly `askedSize` elements.
627         // Still if the allocated capacity is larger, do nothing.
628         void resizeExactly(size_t askedSize) @trusted
629         {
630             if (_allocated < askedSize)
631             {
632                 setCapacity(askedSize);
633             }
634             _size = askedSize;
635         }
636 
637         // Internal use, realloc internal buffer, copy existing items.
638         // Doesn't initialize the new ones.
639         void setCapacity(size_t cap)
640         {
641             size_t numBytes = cap * T.sizeof;
642             _data = cast(T*)(alignedRealloc(_data, numBytes, _alignment));
643             _allocated = cap;
644         }
645 
646         // Compute new capacity, while growing.
647         size_t computeNewCapacity(size_t newLength, size_t oldLength)
648         {
649             // Optimal value (Windows malloc) not far from there.
650             enum size_t PAGESIZE = 4096; 
651 
652             size_t newLengthBytes = newLength * T.sizeof;
653             if (newLengthBytes > PAGESIZE)
654             {
655                 // Larger arrays need a smaller growth factor to avoid wasting too much bytes.
656                 // This was found when tracing curve, not too far from golden ratio for some reason.
657                 return newLength + newLength / 2 + newLength / 8;
658             }
659             else
660             {
661                 // For smaller arrays being pushBack, can bring welcome speed by minimizing realloc.
662                 return newLength * 3;
663             }            
664         }
665     }
666 }
667 
668 unittest
669 {
670     import std.range.primitives;
671     static assert(isOutputRange!(Vec!ubyte, ubyte));
672 
673 
674     import std.random;
675     int NBUF = 200;
676 
677     Xorshift32 rng;
678     rng.seed(0xBAADC0DE);
679 
680     struct box2i { int a, b, c, d; }
681     Vec!box2i[] boxes;
682     boxes.length = NBUF;
683 
684     foreach(i; 0..NBUF)
685     {
686         boxes[i] = makeVec!box2i();
687     }
688 
689     foreach(j; 0..200)
690     {
691         foreach(i; 0..NBUF)
692         {
693             int previousSize = cast(int)(boxes[i].length);
694             void* previousPtr = boxes[i].ptr;
695             foreach(int k; 0..cast(int)(boxes[i].length))
696                 boxes[i][k] = box2i(k, k, k, k);
697 
698             int newSize = uniform(0, 100, rng);
699             boxes[i].resize(newSize);
700 
701             int minSize = previousSize;
702             if (minSize > boxes[i].length)
703                 minSize = cast(int)(boxes[i].length);
704 
705             void* newPtr = boxes[i].ptr;
706             foreach(int k; 0..minSize)
707             {
708                 box2i item = boxes[i][k];
709                 box2i shouldBe = box2i(k, k, k, k);
710                 assert(item == shouldBe);
711             }
712 
713             int sum = 0;
714             foreach(k; 0..newSize)
715             {
716                 box2i bb = boxes[i][k];
717                 sum += bb.a + bb.b + bb.c + bb.d;
718             }
719         }
720     }
721 
722     foreach(i; 0..NBUF)
723         boxes[i].destroy();
724 
725     {
726         auto buf = makeVec!int;
727         enum N = 10;
728         buf.resize(N);
729         foreach(i ; 0..N)
730             buf[i] = i;
731 
732         foreach(i ; 0..N)
733             assert(buf[i] == i);
734 
735         auto buf2 = makeVec!int;
736         buf2.pushBack(11);
737         buf2.pushBack(14);
738 
739         // test pushBack(slice)
740         buf.clearContents();
741         buf.pushBack(buf2[]);
742         assert(buf[0] == 11);
743         assert(buf[1] == 14);
744 
745         // test pushBack(slice)
746         buf2[1] = 8;
747         buf.clearContents();
748         buf.pushBack(buf2);
749         assert(buf[0] == 11);
750         assert(buf[1] == 8);
751     }
752 }
753 
754 // Vec should work without any initialization
755 unittest
756 {
757     Vec!string vec;
758 
759     foreach(e; vec[])
760     {        
761     }
762 
763     assert(vec.length == 0);
764     assert(vec.size == 0);
765     assert(vec.slength == 0);
766     assert(vec.ssize == 0);
767     vec.clearContents();
768     vec.resize(0);
769     assert(vec == vec.init);
770     vec.fill("filler");
771     assert(vec.ptr is null);
772 }
773 
774 // Issue #312: vec.opIndex not returning ref which break struct assignment
775 unittest
776 {
777     static struct A
778     {
779         int x;
780     }
781     Vec!A vec = makeVec!A();
782     A a;
783     vec.pushBack(a);
784     vec ~= a;
785     vec[0].x = 42; // vec[0] needs to return a ref
786     assert(vec[0].x == 42);
787 }
788 unittest // removeAndShiftRestOfArray was wrong
789 {
790     Vec!int v;
791     v.pushBack(14);
792     v.pushBack(27);
793     v.pushBack(38);
794     assert(v.length == 3);
795     v.removeAndShiftRestOfArray(1);
796     assert(v.length == 2);    
797     assert(v[] == [14, 38]);
798 }
799 unittest // removeAndShiftRestOfArray with slice
800 {
801     Vec!int v;
802     v.pushBack([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
803     v.removeAndShiftRestOfArray(3, 5);
804     assert(v[] == [0, 1, 2, 5, 6, 7, 8, 9]);
805     v.removeAndShiftRestOfArray(0, 4);
806     assert(v[] == [6, 7, 8, 9]);
807     v.removeAndShiftRestOfArray(2, 4);
808     assert(v[] == [6, 7]);
809     v.removeAndShiftRestOfArray(2, 2);
810     v.removeAndShiftRestOfArray(0, 0);
811     v.removeAndShiftRestOfArray(1, 1);
812     assert(v[] == [6, 7]);
813 }
814 
815 
816 
817 /// Allows to merge the allocation of several arrays, which saves allocation count and can speed up things thanks to locality.
818 ///
819 /// Example: see below unittest.
820 struct MergedAllocation
821 {
822 nothrow:
823 @nogc:
824 
825     // In debug mode, add stomp detection to `MergedAllocation`.
826     // This takes additional complexity to coarse check for stomping nearby buffers.
827     debug
828     {
829         private enum mergedAllocStompWarning = true;
830     }
831     else
832     {
833         private enum mergedAllocStompWarning = false;
834     }
835 
836 
837     // This adds 32-byte of sentinels around each allocation,
838     // and check at the end of the program that they were unaffected.
839     static if (mergedAllocStompWarning)
840     {
841         // Number of bytes to write between areas to check for stomping. 
842         enum SENTINEL_BYTES = 32; 
843     }
844 
845     enum maxExpectedAlignment = 32;
846 
847     /// Start defining the area of allocations.
848     void start()
849     {
850         // Detect memory errors in former uses.
851         static if (mergedAllocStompWarning)
852         {
853             checkSentinelAreas();
854             _allocateWasCalled = false;
855         }
856 
857         _base = cast(ubyte*)(cast(size_t)0);
858     }
859 
860     /// Allocate (or count) space needed for `numElems` elements of type `T` with given alignment.
861     /// This gets called twice for each array, see example for usage.
862     ///
863     /// This bumps the internal bump allocator.
864     /// Giving null to this chain and converting the result to size_t give the total needed size 
865     /// for the merged allocation.
866     ///
867     /// Warning: 
868     ///          - If called after a `start()` call, the area returned are wrong and are only for 
869     ///             counting needed bytes. Don't use those.
870     ///
871     ///          - If called after an `allocate()` call, the area returned are an actual merged 
872     ///            allocation (if the same calls are done).
873     ///
874     /// Warning: Prefer `allocArray` over `alloc` variant, since the extra length field WILL help 
875     ///          you catch memory errors before release. Else it is very common to miss buffer 
876     ///          overflows in samplerate changes.
877     void allocArray(T)(out T[] array, size_t numElems, size_t alignment = 1)
878     {
879         assert(alignment <= maxExpectedAlignment);
880         assert( (alignment != 0) && ((alignment & (alignment - 1)) == 0)); // power of two
881 
882         size_t adr = cast(size_t) _base;
883 
884         // 1. Align base address
885         size_t mask = ~(alignment - 1);
886         adr = (adr + alignment - 1) & mask;
887 
888         // 2. Assign array and base.
889         array = (cast(T*)adr)[0..numElems];
890         adr += T.sizeof * numElems;
891         _base = cast(ubyte*) adr;
892 
893         static if (mergedAllocStompWarning)
894         {
895             if (_allocateWasCalled && _allocation !is null)
896             {
897                 // Each allocated area followed with SENTINEL_BYTES bytes of value 0xCC
898                 _base[0..SENTINEL_BYTES] = 0xCC;
899                 registerSentinel(_base);
900             }
901             _base += SENTINEL_BYTES;
902         }
903     }
904 
905     ///ditto
906     void alloc(T)(out T* array, size_t numElems, size_t alignment = 1)
907     {
908         T[] arr;
909         allocArray(arr, numElems, alignment);
910         array = arr.ptr;
911     }
912 
913     /// Allocate actual storage for the merged allocation. From there, you need to define exactly the same area with `alloc` and `allocArray`.
914     /// This time they will get a proper value.
915     void allocate()
916     {
917         static if (mergedAllocStompWarning) _allocateWasCalled = true;
918 
919         size_t sizeNeeded =  cast(size_t)_base; // since it was fed 0 at start.
920 
921         if (sizeNeeded == 0)
922         {
923             // If no bytes are requested, it means no buffer were requested, or only with zero size.
924             // We will return a null pointer in that case, since accessing them would be illegal anyway.
925             _allocation = null;
926         }
927         else
928         {
929             // the merged allocation needs to have the largest expected alignment, else the size could depend on the hazards
930             // of the allocation. With maximum alignment, padding is the same so long as areas have smaller or equal alignment requirements.
931             _allocation = cast(ubyte*) _mm_realloc(_allocation, sizeNeeded, maxExpectedAlignment);
932         }
933 
934         // So that the next layout call points to the right area.
935         _base = _allocation;
936     }
937 
938     ~this()
939     {
940         static if (mergedAllocStompWarning)
941         {
942             checkSentinelAreas();
943         }
944 
945         if (_allocation != null)
946         {
947             _mm_free(_allocation);
948             _allocation = null;
949         }
950     }
951 
952 private:
953 
954     // Location of the allocation.
955     ubyte* _allocation = null;
956 
957     ///
958     ubyte* _base = null;
959 
960     static if (mergedAllocStompWarning)
961     {
962         bool _allocateWasCalled = false;
963 
964         Vec!(ubyte*) _sentinels; // start of sentinel area (SENTINAL_BYTES long)
965 
966         void registerSentinel(ubyte* start)
967         {
968             _sentinels.pushBack(start);
969         }
970 
971         bool hasMemoryError() // true if sentinel bytes stomped
972         {
973             assert(_allocateWasCalled && _allocation !is null);
974 
975             foreach(ubyte* s; _sentinels[])
976             {
977                 for (int n = 0; n < 32; ++n)
978                 {
979                     if (s[n] != 0xCC)
980                         return true;
981                 }
982             }
983             return false;
984         }
985 
986         // Check existing sentinels, and unregister them
987         void checkSentinelAreas()
988         {
989             if (!_allocateWasCalled)
990                 return; // still haven't done an allocation, nothing to check.
991 
992             if (_allocation is null)
993                 return; // nothing to check
994 
995             // If you fail here, there is a memory error in your access patterns.
996             // Sentinel bytes of value 0xCC were overwritten in a `MergedAllocation`.
997             // You can use slices with `allocArray` instead of `alloc` to find the faulty 
998             // access. This check doesn't catch everything!
999             assert(! hasMemoryError());
1000 
1001             _sentinels.clearContents();
1002         }
1003     }
1004 }
1005 
1006 
1007 // Here is how you should use MergedAllocation. 
1008 unittest
1009 {
1010     static struct MyDSPStruct
1011     {
1012     public:
1013     nothrow:
1014     @nogc:
1015         void initialize(int maxFrames)
1016         {
1017             _mergedAlloc.start();
1018             layout(_mergedAlloc, maxFrames); // you need such a layout function to be called twice.
1019             _mergedAlloc.allocate();
1020             layout(_mergedAlloc, maxFrames); // the first time arrays area allocated in the `null` area, the second time in
1021                                              // actually allocated memory (since we now have the needed length).
1022         }
1023     
1024         void layout(ref MergedAllocation ma, int maxFrames)
1025         {
1026             // allocate `maxFrames` elems, and return a slice in `_intermediateBuf`.
1027             ma.allocArray(_intermediateBuf, maxFrames); 
1028 
1029             // allocate `maxFrames` elems, aligned to 16-byte boundaries. Return a pointer to that in `_coeffs`.
1030             ma.alloc(_coeffs, maxFrames, 16);
1031         }
1032 
1033     private:
1034         float[] _intermediateBuf;
1035         double* _coeffs;
1036         MergedAllocation _mergedAlloc;
1037     }
1038 
1039     MyDSPStruct s;
1040     s.initialize(14);
1041     s._coeffs[0..14] = 1.0f;
1042     s._intermediateBuf[0..14] = 1.0f;
1043     s.initialize(17);
1044     s._coeffs[0..17] = 1.0f;
1045     s._intermediateBuf[0..17] = 1.0f;
1046 }
1047 
1048 // Should be valid to allocate nothing with a MergedAllocation.
1049 unittest
1050 {
1051     MergedAllocation ma;
1052     ma.start();
1053     ma.allocate();
1054     assert(ma._allocation == null);
1055 }
1056 
1057 // Should be valid to allocate nothing with a MergedAllocation.
1058 unittest
1059 {
1060     MergedAllocation ma;
1061     ma.start();
1062     ubyte[] arr, arr2;
1063     ma.allocArray(arr, 17);
1064     ma.allocArray(arr2, 24);
1065     assert(ma._allocation == null);
1066 
1067     ma.allocate();
1068     ma.allocArray(arr, 17);
1069     ma.allocArray(arr2, 24);
1070     assert(ma._allocation != null);
1071     assert(arr.length == 17);
1072     assert(arr2.length == 24);
1073 
1074     // Test memory error detection with a simple case.
1075     static if (MergedAllocation.mergedAllocStompWarning)
1076     {
1077         assert(!ma.hasMemoryError());
1078 
1079         // Create a memory error
1080         arr.ptr[18] = 2;
1081         assert(ma.hasMemoryError());
1082         arr.ptr[18] = 0xCC; // undo the error to avoid stopping the unittests
1083     }
1084 }