1 /**
2 Defines `Vec`, `reallocBuffer` and memory functions.
3 
4 Copyright: Guillaume Piolat 2015-2016.
5 License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
6 Authors:   Guillaume Piolat
7 */
8 module dplug.core.vec;
9 
10 import std.traits: hasElaborateDestructor;
11 
12 import core.stdc.stdlib: malloc, free, realloc;
13 import core.stdc.string: memcpy, memmove;
14 
15 import core.exception;
16 import inteli.xmmintrin;
17 
18 
19 // This module deals with aligned memory.
20 // You'll also find here a non-copyable std::vector equivalent `Vec`.
21 
22 /// Allocates an aligned memory chunk.
23 /// Functionally equivalent to Visual C++ _aligned_malloc.
24 /// Do not mix allocations with different alignment.
25 /// Important: `alignedMalloc(0)` does not necessarily return `null`, and its result 
26 ///            _has_ to be freed with `alignedFree`.
27 void* alignedMalloc(size_t size, size_t alignment) nothrow @nogc
28 {
29     assert(alignment != 0);
30 
31     // Short-cut and use the C allocator to avoid overhead if no alignment
32     if (alignment == 1)
33     {
34         // C99:
35         // Implementation-defined behavior
36         // Whether the calloc, malloc, and realloc functions return a null pointer
37         // or a pointer to an allocated object when the size requested is zero.
38         // In any case, we'll have to free() it.
39         return malloc(size);
40     }
41 
42     size_t request = requestedSize(size, alignment);
43     void* raw = malloc(request);
44 
45     if (request > 0 && raw == null) // malloc(0) can validly return anything
46         onOutOfMemoryError();
47 
48     return storeRawPointerPlusSizeAndReturnAligned(raw, size, alignment);
49 }
50 
51 /// Frees aligned memory allocated by alignedMalloc or alignedRealloc.
52 /// Functionally equivalent to Visual C++ _aligned_free.
53 /// Do not mix allocations with different alignment.
54 void alignedFree(void* aligned, size_t alignment) nothrow @nogc
55 {
56     // Short-cut and use the C allocator to avoid overhead if no alignment
57     if (alignment == 1)
58         return free(aligned);
59 
60     // support for free(NULL)
61     if (aligned is null)
62         return;
63 
64     assert(alignment != 0);
65     assert(isPointerAligned(aligned, alignment));
66 
67     void** rawLocation = cast(void**)(cast(char*)aligned - size_t.sizeof);
68     free(*rawLocation);
69 }
70 
71 /// Reallocates an aligned memory chunk allocated by `alignedMalloc` or `alignedRealloc`.
72 /// Functionally equivalent to Visual C++ `_aligned_realloc`.
73 /// Do not mix allocations with different alignment.
74 /// Important: `alignedRealloc(p, 0)` does not necessarily return `null`, and its result 
75 ///            _has_ to be freed with `alignedFree`.
76 void* alignedRealloc(void* aligned, size_t size, size_t alignment) nothrow @nogc
77 {
78     return alignedReallocImpl!true(aligned, size, alignment);
79 }
80 
81 
82 /// Same as `alignedRealloc` but does not preserve data.
83 void* alignedReallocDiscard(void* aligned, size_t size, size_t alignment) nothrow @nogc
84 {
85     return alignedReallocImpl!false(aligned, size, alignment);
86 }
87 
88 
89 /// Returns: `true` if the pointer is suitably aligned.
90 bool isPointerAligned(void* p, size_t alignment) pure nothrow @nogc
91 {
92     assert(alignment != 0);
93     return ( cast(size_t)p & (alignment - 1) ) == 0;
94 }
95 unittest
96 {
97     ubyte b;
98     align(16) ubyte[5] c;
99     assert(isPointerAligned(&b, 1));
100     assert(!isPointerAligned(&c[1], 2));
101     assert(isPointerAligned(&c[4], 4));
102 }
103 
104 /// Does memory slices a[0..a_size] and b[0..b_size] have an overlapping byte?
105 bool isMemoryOverlapping(const(void)* a, ptrdiff_t a_size, 
106                          const(void)* b, ptrdiff_t b_size) pure @trusted
107 {
108     assert(a_size >= 0 && b_size >= 0);
109 
110     if (a is null || b is null)
111         return false;
112 
113     if (a_size == 0 || b_size == 0)
114         return false;
115 
116     ubyte* lA = cast(ubyte*)a;
117     ubyte* hA = lA + a_size;
118     ubyte* lB = cast(ubyte*)b;
119     ubyte* hB = lB + b_size;
120 
121     // There is overlapping, if lA is inside lB..hB, or lB is inside lA..hA
122 
123     if (lA >= lB && lA < hB)
124         return true;
125 
126     if (lB >= lA && lB < hA)
127         return true;
128 
129     return false;
130 }
131 bool isMemoryOverlapping(const(void)[] a, const(void)[] b) pure @trusted
132 {
133     return isMemoryOverlapping(a.ptr, a.length, b.ptr, b.length);
134 }
135 unittest
136 {
137     ubyte[100] a;
138     assert(!isMemoryOverlapping(null, a));
139     assert(!isMemoryOverlapping(a, null));
140     assert(!isMemoryOverlapping(a[1..1], a[0..10]));
141     assert(!isMemoryOverlapping(a[1..10], a[10..100]));
142     assert(!isMemoryOverlapping(a[30..100], a[0..30]));
143     assert(isMemoryOverlapping(a[1..50], a[49..100]));
144     assert(isMemoryOverlapping(a[49..100], a[1..50]));
145     assert(isMemoryOverlapping(a[40..45], a[30..55]));
146     assert(isMemoryOverlapping(a[30..55], a[40..45]));
147 }
148 
149 private nothrow @nogc
150 {
151     void* alignedReallocImpl(bool PreserveDataIfResized)(void* aligned, size_t size, size_t alignment)
152     {
153         // Short-cut and use the C allocator to avoid overhead if no alignment
154         if (alignment == 1)
155         {
156             // C99:
157             // Implementation-defined behavior
158             // Whether the calloc, malloc, and realloc functions return a null pointer
159             // or a pointer to an allocated object when the size requested is zero.
160             // In any case, we'll have to `free()` it.
161             return realloc(aligned, size);
162         }
163 
164         if (aligned is null)
165             return alignedMalloc(size, alignment);
166 
167         assert(alignment != 0);
168         assert(isPointerAligned(aligned, alignment));
169 
170         size_t previousSize = *cast(size_t*)(cast(char*)aligned - size_t.sizeof * 2);
171 
172         void* raw = *cast(void**)(cast(char*)aligned - size_t.sizeof);
173         size_t request = requestedSize(size, alignment);
174         size_t previousRequest = requestedSize(previousSize, alignment);
175         assert(previousRequest - request == previousSize - size); // same alignment
176 
177         // Heuristic: if a requested size is within 50% to 100% of what is already allocated
178         //            then exit with the same pointer
179         if ( (previousRequest < request * 4) && (request <= previousRequest) )
180             return aligned;
181 
182         void* newRaw = malloc(request);
183         static if( __VERSION__ > 2067 ) // onOutOfMemoryError wasn't nothrow before July 2014
184         {
185             if (request > 0 && newRaw == null) // realloc(0) can validly return anything
186                 onOutOfMemoryError();
187         }
188 
189         void* newAligned = storeRawPointerPlusSizeAndReturnAligned(newRaw, size, alignment);
190 
191         static if (PreserveDataIfResized)
192         {
193             size_t minSize = size < previousSize ? size : previousSize;
194             memcpy(newAligned, aligned, minSize); // memcpy OK
195         }
196 
197         // Free previous data
198         alignedFree(aligned, alignment);
199         assert(isPointerAligned(newAligned, alignment));
200         return newAligned;
201     }
202 
203     /// Returns: next pointer aligned with alignment bytes.
204     void* nextAlignedPointer(void* start, size_t alignment) pure
205     {
206         import dplug.core.math : nextMultipleOf;
207         return cast(void*)nextMultipleOf(cast(size_t)(start), alignment);
208     }
209 
210     // Returns number of bytes to actually allocate when asking
211     // for a particular alignement
212     size_t requestedSize(size_t askedSize, size_t alignment) pure
213     {
214         enum size_t pointerSize = size_t.sizeof;
215         return askedSize + alignment - 1 + pointerSize * 2;
216     }
217 
218     // Store pointer given my malloc, and size in bytes initially requested (alignedRealloc needs it)
219     void* storeRawPointerPlusSizeAndReturnAligned(void* raw, size_t size, size_t alignment)
220     {
221         enum size_t pointerSize = size_t.sizeof;
222         char* start = cast(char*)raw + pointerSize * 2;
223         void* aligned = nextAlignedPointer(start, alignment);
224         void** rawLocation = cast(void**)(cast(char*)aligned - pointerSize);
225         *rawLocation = raw;
226         size_t* sizeLocation = cast(size_t*)(cast(char*)aligned - 2 * pointerSize);
227         *sizeLocation = size;
228         assert( isPointerAligned(aligned, alignment) );
229         return aligned;
230     }
231 }
232 
233 unittest
234 {
235     {
236         void* p = alignedMalloc(23, 16);
237         assert(p !is null);
238         assert(((cast(size_t)p) & 0xf) == 0);
239 
240         alignedFree(p, 16);
241     }
242 
243     void* nullAlloc = alignedMalloc(0, 16);
244     assert(nullAlloc != null);
245     nullAlloc = alignedRealloc(nullAlloc, 0, 16);
246     assert(nullAlloc != null);
247     alignedFree(nullAlloc, 16);
248 
249     {
250         int alignment = 16;
251         int* p = null;
252 
253         // check if growing keep values in place
254         foreach(int i; 0..100)
255         {
256             p = cast(int*) alignedRealloc(p, (i + 1) * int.sizeof, alignment);
257             p[i] = i;
258         }
259 
260         foreach(int i; 0..100)
261             assert(p[i] == i);
262 
263         p = cast(int*) alignedRealloc(p, 0, alignment);
264         assert(p !is null);
265 
266         alignedFree(p, alignment);
267     }
268 
269     // Verify that same size alloc preserve pointer. 
270     {
271         void* p = null;
272         p = alignedRealloc(p, 254, 16);
273         void* p2 = alignedRealloc(p, 254, 16);
274         assert(p == p2);
275 
276         // Test shrink heuristic
277         void* p3 = alignedRealloc(p, 128, 16);
278         assert(p == p3);
279         alignedFree(p3, 16);
280     }
281 }
282 
283 
284 
285 /// Used throughout dplug:dsp to avoid reliance on GC.
286 /// Important: Size 0 is special-case to free the slice.
287 /// This works a bit like alignedRealloc except with slices as input.
288 /// You MUST use consistent alignement thoughout the lifetime of this buffer.
289 ///
290 /// Params:
291 ///    buffer = Existing allocated buffer. Can be null. 
292 ///             Input slice length is not considered.
293 ///    length = Desired slice length.
294 ///    alignment = Alignement if the slice has allocation requirements, 1 else. 
295 ///                Must match for deallocation.
296 ///
297 void reallocBuffer(T)(ref T[] buffer, size_t length, int alignment = 1) nothrow @nogc
298 {
299     static if (is(T == struct) && hasElaborateDestructor!T)
300     {
301         static assert(false); // struct with destructors not supported
302     }
303 
304     /// Size 0 is special-case to free the slice.
305     if (length == 0)
306     {
307         alignedFree(buffer.ptr, alignment);
308         buffer = null;
309         return;
310     }
311 
312     T* pointer = cast(T*) alignedRealloc(buffer.ptr, T.sizeof * length, alignment);
313     if (pointer is null)
314         buffer = null; // alignment 1 can still return null
315     else
316         buffer = pointer[0..length];
317 }
318 
319 
320 /// Returns: A newly created `Vec`.
321 Vec!T makeVec(T)(size_t initialSize = 0, int alignment = 1) nothrow @nogc
322 {
323     return Vec!T(initialSize, alignment);
324 }
325 
326 /// Kind of a std::vector replacement.
327 /// Grow-only array, points to a (optionally aligned) memory location.
328 /// This can also work as an output range.
329 /// `Vec` is designed to work even when uninitialized, without `makeVec`.
330 struct Vec(T)
331 {
332 nothrow:
333 @nogc:
334     public
335     {
336         /// Creates an aligned buffer with given initial size.
337         this(size_t initialSize, int alignment)
338         {
339             assert(alignment != 0);
340             _size = 0;
341             _allocated = 0;
342             _data = null;
343             _alignment = alignment;
344             resize(initialSize);
345         }
346 
347         ~this()
348         {
349             if (_data !is null)
350             {
351                 alignedFree(_data, _alignment);
352                 _data = null;
353                 _allocated = 0;
354             }
355         }
356 
357         @disable this(this);
358 
359         /// Returns: Length of buffer in elements.
360         size_t length() pure const
361         {
362             return _size;
363         }
364 
365         /// Returns: Length of buffer in elements.
366         alias opDollar = length;
367 
368         /// Resizes a buffer to hold $(D askedSize) elements.
369         void resize(size_t askedSize)
370         {
371             // grow only
372             if (_allocated < askedSize)
373             {
374                 size_t numBytes = askedSize * 2 * T.sizeof; // gives 2x what is asked to make room for growth
375                 _data = cast(T*)(alignedRealloc(_data, numBytes, _alignment));
376                 _allocated = askedSize * 2;
377             }
378             _size = askedSize;
379         }
380 
381         /// Pop last element
382         T popBack()
383         {
384             assert(_size > 0);
385             _size = _size - 1;
386             return _data[_size];
387         }
388 
389         /// Append an element to this buffer.
390         void pushBack(T x)
391         {
392             size_t i = _size;
393             resize(_size + 1);
394             _data[i] = x;
395         }
396 
397         // DMD 2.088 deprecates the old D1-operators
398         static if (__VERSION__ >= 2088)
399         {
400             ///ditto
401             void opOpAssign(string op)(T x) if (op == "~")
402             {
403                 pushBack(x);
404             }
405         }
406         else
407         {
408             ///ditto
409             void opCatAssign(T x)
410             {
411                 pushBack(x);
412             }
413         }
414 
415         // Output range support
416         alias put = pushBack;
417 
418         /// Finds an item, returns -1 if not found
419         int indexOf(T x)
420         {
421             enum bool isStaticArray(T) = __traits(isStaticArray, T);
422 
423             static if (isStaticArray!T)
424             {
425                 // static array would be compared by identity as slice, which is not what we want.
426                 foreach(int i; 0..cast(int)_size)
427                     if (_data[i] == x)
428                         return i;
429             }
430             else
431             {
432                 // base types: identity is equality
433                 // reference types: looking for identity
434                 foreach(int i; 0..cast(int)_size)
435                     if (_data[i] is x)
436                         return i;
437             }
438             return -1;
439         }
440 
441         /// Removes an item and replaces it by the last item.
442         /// Warning: this reorders the array.
443         void removeAndReplaceByLastElement(size_t index)
444         {
445             assert(index < _size);
446             _data[index] = _data[--_size];
447         }
448 
449         /// Removes an item and shift the rest of the array to front by 1.
450         /// Warning: O(N) complexity.
451         void removeAndShiftRestOfArray(size_t index)
452         {
453             assert(index < _size);
454             for (; index + 1 < _size; ++index)
455                 _data[index] = _data[index+1];
456         }
457 
458         /// Appends another buffer to this buffer.
459         void pushBack(ref Vec other)
460         {
461             size_t oldSize = _size;
462             resize(_size + other._size);
463             memmove(_data + oldSize, other._data, T.sizeof * other._size);
464         }
465 
466         /// Appends a slice to this buffer.
467         /// `slice` should not belong to the same buffer _data.
468         void pushBack(T[] slice)
469         {
470             size_t oldSize = _size;
471             size_t newSize = _size + slice.length;
472             resize(newSize);
473             for (size_t n = 0; n < slice.length; ++n)
474                 _data[oldSize + n] = slice[n];
475         }
476 
477         /// Returns: Raw pointer to data.
478         @property inout(T)* ptr() inout
479         {
480             return _data;
481         }
482 
483         /// Returns: n-th element.
484         ref inout(T) opIndex(size_t i) pure inout
485         {
486             return _data[i];
487         }
488 
489         T opIndexAssign(T x, size_t i)
490         {
491             return _data[i] = x;
492         }
493 
494         /// Sets size to zero, but keeps allocated buffers.
495         void clearContents()
496         {
497             _size = 0;
498         }
499 
500         /// Returns: Whole content of the array in one slice.
501         inout(T)[] opSlice() inout
502         {
503             return opSlice(0, length());
504         }
505 
506         /// Returns: A slice of the array.
507         inout(T)[] opSlice(size_t i1, size_t i2) inout
508         {
509             return _data[i1 .. i2];
510         }
511 
512         /// Fills the buffer with the same value.
513         void fill(T x)
514         {
515             _data[0.._size] = x;
516         }
517 
518         /// Move. Give up owner ship of the data.
519         T[] releaseData()
520         {
521             T[] data = _data[0.._size];
522             assert(_alignment == 1); // else would need to be freed with alignedFree.
523             this._data = null;
524             this._size = 0;
525             this._allocated = 0;
526             this._alignment = 0;
527             return data;
528         }
529     }
530 
531     private
532     {
533         size_t _size = 0;
534         T* _data = null;
535         size_t _allocated = 0;
536         size_t _alignment = 1; // for an unaligned Vec, you probably are not interested in alignment
537     }
538 }
539 
540 unittest
541 {
542     import std.range.primitives;
543     static assert(isOutputRange!(Vec!ubyte, ubyte));
544 
545 
546     import std.random;
547     import std.algorithm.comparison;
548     int NBUF = 200;
549 
550     Xorshift32 rng;
551     rng.seed(0xBAADC0DE);
552 
553     struct box2i { int a, b, c, d; }
554     Vec!box2i[] boxes;
555     boxes.length = NBUF;
556 
557     foreach(i; 0..NBUF)
558     {
559         boxes[i] = makeVec!box2i();
560     }
561 
562     foreach(j; 0..200)
563     {
564         foreach(i; 0..NBUF)
565         {
566             int previousSize = cast(int)(boxes[i].length);
567             void* previousPtr = boxes[i].ptr;
568             foreach(int k; 0..cast(int)(boxes[i].length))
569                 boxes[i][k] = box2i(k, k, k, k);
570 
571             int newSize = uniform(0, 100, rng);
572             boxes[i].resize(newSize);
573 
574             int minSize = min(previousSize, boxes[i].length);
575             void* newPtr = boxes[i].ptr;
576             foreach(int k; 0..minSize)
577             {
578                 box2i item = boxes[i][k];
579                 box2i shouldBe = box2i(k, k, k, k);
580                 assert(item == shouldBe);
581             }
582 
583             int sum = 0;
584             foreach(k; 0..newSize)
585             {
586                 box2i bb = boxes[i][k];
587                 sum += bb.a + bb.b + bb.c + bb.d;
588             }
589         }
590     }
591 
592     foreach(i; 0..NBUF)
593         boxes[i].destroy();
594 
595     {
596         auto buf = makeVec!int;
597         enum N = 10;
598         buf.resize(N);
599         foreach(i ; 0..N)
600             buf[i] = i;
601 
602         foreach(i ; 0..N)
603             assert(buf[i] == i);
604 
605         auto buf2 = makeVec!int;
606         buf2.pushBack(11);
607         buf2.pushBack(14);
608 
609         // test pushBack(slice)
610         buf.clearContents();
611         buf.pushBack(buf2[]);
612         assert(buf[0] == 11);
613         assert(buf[1] == 14);
614 
615         // test pushBack(slice)
616         buf2[1] = 8;
617         buf.clearContents();
618         buf.pushBack(buf2);
619         assert(buf[0] == 11);
620         assert(buf[1] == 8);
621     }
622 }
623 
624 
625 // Vec should work without any initialization
626 unittest
627 {
628     Vec!string vec;
629 
630     foreach(e; vec[])
631     {        
632     }
633 
634     assert(vec.length == 0);
635     vec.clearContents();
636     vec.resize(0);
637     assert(vec == vec.init);
638     vec.fill("filler");
639     assert(vec.ptr is null);
640 }
641 
642 // Issue #312: vec.opIndex not returning ref which break struct assignment
643 unittest
644 {
645     static struct A
646     {
647         int x;
648     }
649     Vec!A vec = makeVec!A();
650     A a;
651     vec.pushBack(a);
652     vec ~= a;
653     vec[0].x = 42; // vec[0] needs to return a ref
654     assert(vec[0].x == 42);
655 }
656 
657 /// Allows to merge the allocation of several arrays, which saves allocation count and can speed up things thanks to locality.
658 ///
659 /// Example: see below unittest.
660 struct MergedAllocation
661 {
662 nothrow:
663 @nogc:
664 
665     enum maxExpectedAlignment = 32;
666 
667     /// Start defining the area of allocations.
668     void start()
669     {
670         _base = cast(ubyte*)(cast(size_t)0);
671     }
672 
673     /// Given pointer `base`, `array` gets an alignement area with `numElems` T elements and a given alignment.
674     /// `base` gets incremented to point to just after that area.
675     /// This is usful to create merged allocations with a chain of `mergedAllocArea`.
676     /// Giving null to this chain and converting the result to size_t give the total needed size for the merged allocation.
677     /// Warning: if called after a `start()` call, the area returned are wrong and are only for counting needed bytes.
678     ///          if called after an `allocate()` call, the area returned are right (if the same calls are done).
679     void allocArray(T)(out T[] array, size_t numElems, size_t alignment = 1)
680     {
681         assert(alignment <= maxExpectedAlignment);
682         assert( (alignment != 0) && ((alignment & (alignment - 1)) == 0)); // power of two
683 
684         size_t adr = cast(size_t) _base;
685 
686         // 1. Align base address
687         size_t mask = ~(alignment - 1);
688         adr = (adr + alignment - 1) & mask;
689 
690         // 2. Assign array and base.
691         array = (cast(T*)adr)[0..numElems];
692         adr += T.sizeof * numElems;
693         _base = cast(ubyte*) adr;
694     }
695 
696     ///ditto
697     void alloc(T)(out T* array, size_t numElems, size_t alignment = 1)
698     {
699         T[] arr;
700         allocArray(arr, numElems, alignment);
701         array = arr.ptr;
702     }
703 
704     /// Allocate actual storage for the merged allocation. From there, you need to define exactly the same area with `alloc` and `allocArray`.
705     /// This time they will get a proper value.
706     void allocate()
707     {
708         size_t sizeNeeded =  cast(size_t)_base; // since it was fed 0 at start.
709 
710         // the merged allocation needs to have the largest expected alignment, else the size could depend on the hazards
711         // of the allocation. With maximum alignment, padding is the same so long as areas have smaller or equal alignment requirements.
712         _allocation = cast(ubyte*) _mm_realloc(_allocation, sizeNeeded, maxExpectedAlignment);
713 
714         // So that the next layout call points to the right area.
715         _base = _allocation;
716     }
717 
718     ~this()
719     {
720         _allocation = cast(ubyte*) _mm_realloc(_allocation, 0, maxExpectedAlignment);
721     }
722 
723 
724 private:
725 
726     // Location of the allocation.
727     ubyte* _allocation = null;
728 
729     ///
730     ubyte* _base = null;
731 }
732 
733 unittest
734 {
735     static struct MyDSPStruct
736     {
737     public:
738     nothrow:
739     @nogc:
740         void initialize(int maxFrames)
741         {
742             _mergedAlloc.start();
743             layout(_mergedAlloc, maxFrames); // you need such a layout function to be called twice.
744             _mergedAlloc.allocate();
745             layout(_mergedAlloc, maxFrames); // the first time arrays area allocated in the `null` area, the second time in
746                                              // actually allocated memory (since we now have the needed length).
747         }
748     
749         void layout(ref MergedAllocation ma, int maxFrames)
750         {
751             // allocate `maxFrames` elems, and return a slice in `_intermediateBuf`.
752             ma.allocArray(_intermediateBuf, maxFrames); 
753 
754             // allocate `maxFrames` elems, aligned to 16-byte boundaries. Return a pointer to that in `_coeffs`.
755             ma.alloc(_coeffs, maxFrames, 16);
756         }
757 
758     private:
759         float[] _intermediateBuf;
760         double* _coeffs;
761         MergedAllocation _mergedAlloc;
762     }
763 
764     MyDSPStruct s;
765     s.initialize(14);
766     s._coeffs[0..14] = 1.0f;
767     s._intermediateBuf[0..14] = 1.0f;
768     s.initialize(17);
769     s._coeffs[0..17] = 1.0f;
770     s._intermediateBuf[0..17] = 1.0f;
771 }