1 /** 2 Defines `Vec`, `reallocBuffer` and memory functions. 3 4 Copyright: Guillaume Piolat 2015-2016. 5 License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 6 Authors: Guillaume Piolat 7 */ 8 module dplug.core.vec; 9 10 import std.traits: hasElaborateDestructor; 11 12 import core.stdc.stdlib: malloc, free, realloc; 13 import core.stdc.string: memcpy, memmove; 14 15 import core.exception; 16 import inteli.xmmintrin; 17 18 19 // This module deals with aligned memory. 20 // You'll also find here a non-copyable std::vector equivalent `Vec`. 21 22 /// Allocates an aligned memory chunk. 23 /// Functionally equivalent to Visual C++ _aligned_malloc. 24 /// Do not mix allocations with different alignment. 25 /// Important: `alignedMalloc(0)` does not necessarily return `null`, and its result 26 /// _has_ to be freed with `alignedFree`. 27 void* alignedMalloc(size_t size, size_t alignment) nothrow @nogc 28 { 29 assert(alignment != 0); 30 31 // Short-cut and use the C allocator to avoid overhead if no alignment 32 if (alignment == 1) 33 { 34 // C99: 35 // Implementation-defined behavior 36 // Whether the calloc, malloc, and realloc functions return a null pointer 37 // or a pointer to an allocated object when the size requested is zero. 38 // In any case, we'll have to free() it. 39 return malloc(size); 40 } 41 42 size_t request = requestedSize(size, alignment); 43 void* raw = malloc(request); 44 45 if (request > 0 && raw == null) // malloc(0) can validly return anything 46 onOutOfMemoryError(); 47 48 return storeRawPointerPlusSizeAndReturnAligned(raw, size, alignment); 49 } 50 51 /// Frees aligned memory allocated by alignedMalloc or alignedRealloc. 52 /// Functionally equivalent to Visual C++ _aligned_free. 53 /// Do not mix allocations with different alignment. 54 void alignedFree(void* aligned, size_t alignment) nothrow @nogc 55 { 56 // Short-cut and use the C allocator to avoid overhead if no alignment 57 if (alignment == 1) 58 return free(aligned); 59 60 // support for free(NULL) 61 if (aligned is null) 62 return; 63 64 assert(alignment != 0); 65 assert(isPointerAligned(aligned, alignment)); 66 67 void** rawLocation = cast(void**)(cast(char*)aligned - size_t.sizeof); 68 free(*rawLocation); 69 } 70 71 /// Reallocates an aligned memory chunk allocated by `alignedMalloc` or `alignedRealloc`. 72 /// Functionally equivalent to Visual C++ `_aligned_realloc`. 73 /// Do not mix allocations with different alignment. 74 /// Important: `alignedRealloc(p, 0)` does not necessarily return `null`, and its result 75 /// _has_ to be freed with `alignedFree`. 76 void* alignedRealloc(void* aligned, size_t size, size_t alignment) nothrow @nogc 77 { 78 return alignedReallocImpl!true(aligned, size, alignment); 79 } 80 81 82 /// Same as `alignedRealloc` but does not preserve data. 83 void* alignedReallocDiscard(void* aligned, size_t size, size_t alignment) nothrow @nogc 84 { 85 return alignedReallocImpl!false(aligned, size, alignment); 86 } 87 88 89 /// Returns: `true` if the pointer is suitably aligned. 90 bool isPointerAligned(void* p, size_t alignment) pure nothrow @nogc 91 { 92 assert(alignment != 0); 93 return ( cast(size_t)p & (alignment - 1) ) == 0; 94 } 95 unittest 96 { 97 ubyte b; 98 align(16) ubyte[5] c; 99 assert(isPointerAligned(&b, 1)); 100 assert(!isPointerAligned(&c[1], 2)); 101 assert(isPointerAligned(&c[4], 4)); 102 } 103 104 /// Does memory slices a[0..a_size] and b[0..b_size] have an overlapping byte? 105 bool isMemoryOverlapping(const(void)* a, ptrdiff_t a_size, 106 const(void)* b, ptrdiff_t b_size) pure @trusted 107 { 108 assert(a_size >= 0 && b_size >= 0); 109 110 if (a is null || b is null) 111 return false; 112 113 if (a_size == 0 || b_size == 0) 114 return false; 115 116 ubyte* lA = cast(ubyte*)a; 117 ubyte* hA = lA + a_size; 118 ubyte* lB = cast(ubyte*)b; 119 ubyte* hB = lB + b_size; 120 121 // There is overlapping, if lA is inside lB..hB, or lB is inside lA..hA 122 123 if (lA >= lB && lA < hB) 124 return true; 125 126 if (lB >= lA && lB < hA) 127 return true; 128 129 return false; 130 } 131 bool isMemoryOverlapping(const(void)[] a, const(void)[] b) pure @trusted 132 { 133 return isMemoryOverlapping(a.ptr, a.length, b.ptr, b.length); 134 } 135 unittest 136 { 137 ubyte[100] a; 138 assert(!isMemoryOverlapping(null, a)); 139 assert(!isMemoryOverlapping(a, null)); 140 assert(!isMemoryOverlapping(a[1..1], a[0..10])); 141 assert(!isMemoryOverlapping(a[1..10], a[10..100])); 142 assert(!isMemoryOverlapping(a[30..100], a[0..30])); 143 assert(isMemoryOverlapping(a[1..50], a[49..100])); 144 assert(isMemoryOverlapping(a[49..100], a[1..50])); 145 assert(isMemoryOverlapping(a[40..45], a[30..55])); 146 assert(isMemoryOverlapping(a[30..55], a[40..45])); 147 } 148 149 private nothrow @nogc 150 { 151 void* alignedReallocImpl(bool PreserveDataIfResized)(void* aligned, size_t size, size_t alignment) 152 { 153 // Short-cut and use the C allocator to avoid overhead if no alignment 154 if (alignment == 1) 155 { 156 // C99: 157 // Implementation-defined behavior 158 // Whether the calloc, malloc, and realloc functions return a null pointer 159 // or a pointer to an allocated object when the size requested is zero. 160 // In any case, we'll have to `free()` it. 161 return realloc(aligned, size); 162 } 163 164 if (aligned is null) 165 return alignedMalloc(size, alignment); 166 167 assert(alignment != 0); 168 assert(isPointerAligned(aligned, alignment)); 169 170 size_t previousSize = *cast(size_t*)(cast(char*)aligned - size_t.sizeof * 2); 171 172 void* raw = *cast(void**)(cast(char*)aligned - size_t.sizeof); 173 size_t request = requestedSize(size, alignment); 174 size_t previousRequest = requestedSize(previousSize, alignment); 175 assert(previousRequest - request == previousSize - size); // same alignment 176 177 // Heuristic: if a requested size is within 50% to 100% of what is already allocated 178 // then exit with the same pointer 179 if ( (previousRequest < request * 4) && (request <= previousRequest) ) 180 return aligned; 181 182 void* newRaw = malloc(request); 183 static if( __VERSION__ > 2067 ) // onOutOfMemoryError wasn't nothrow before July 2014 184 { 185 if (request > 0 && newRaw == null) // realloc(0) can validly return anything 186 onOutOfMemoryError(); 187 } 188 189 void* newAligned = storeRawPointerPlusSizeAndReturnAligned(newRaw, size, alignment); 190 191 static if (PreserveDataIfResized) 192 { 193 size_t minSize = size < previousSize ? size : previousSize; 194 memcpy(newAligned, aligned, minSize); // memcpy OK 195 } 196 197 // Free previous data 198 alignedFree(aligned, alignment); 199 assert(isPointerAligned(newAligned, alignment)); 200 return newAligned; 201 } 202 203 /// Returns: next pointer aligned with alignment bytes. 204 void* nextAlignedPointer(void* start, size_t alignment) pure 205 { 206 import dplug.core.math : nextMultipleOf; 207 return cast(void*)nextMultipleOf(cast(size_t)(start), alignment); 208 } 209 210 // Returns number of bytes to actually allocate when asking 211 // for a particular alignement 212 size_t requestedSize(size_t askedSize, size_t alignment) pure 213 { 214 enum size_t pointerSize = size_t.sizeof; 215 return askedSize + alignment - 1 + pointerSize * 2; 216 } 217 218 // Store pointer given my malloc, and size in bytes initially requested (alignedRealloc needs it) 219 void* storeRawPointerPlusSizeAndReturnAligned(void* raw, size_t size, size_t alignment) 220 { 221 enum size_t pointerSize = size_t.sizeof; 222 char* start = cast(char*)raw + pointerSize * 2; 223 void* aligned = nextAlignedPointer(start, alignment); 224 void** rawLocation = cast(void**)(cast(char*)aligned - pointerSize); 225 *rawLocation = raw; 226 size_t* sizeLocation = cast(size_t*)(cast(char*)aligned - 2 * pointerSize); 227 *sizeLocation = size; 228 assert( isPointerAligned(aligned, alignment) ); 229 return aligned; 230 } 231 } 232 233 unittest 234 { 235 { 236 void* p = alignedMalloc(23, 16); 237 assert(p !is null); 238 assert(((cast(size_t)p) & 0xf) == 0); 239 240 alignedFree(p, 16); 241 } 242 243 void* nullAlloc = alignedMalloc(0, 16); 244 assert(nullAlloc != null); 245 nullAlloc = alignedRealloc(nullAlloc, 0, 16); 246 assert(nullAlloc != null); 247 alignedFree(nullAlloc, 16); 248 249 { 250 int alignment = 16; 251 int* p = null; 252 253 // check if growing keep values in place 254 foreach(int i; 0..100) 255 { 256 p = cast(int*) alignedRealloc(p, (i + 1) * int.sizeof, alignment); 257 p[i] = i; 258 } 259 260 foreach(int i; 0..100) 261 assert(p[i] == i); 262 263 p = cast(int*) alignedRealloc(p, 0, alignment); 264 assert(p !is null); 265 266 alignedFree(p, alignment); 267 } 268 269 // Verify that same size alloc preserve pointer. 270 { 271 void* p = null; 272 p = alignedRealloc(p, 254, 16); 273 void* p2 = alignedRealloc(p, 254, 16); 274 assert(p == p2); 275 276 // Test shrink heuristic 277 void* p3 = alignedRealloc(p, 128, 16); 278 assert(p == p3); 279 alignedFree(p3, 16); 280 } 281 } 282 283 284 285 /// Used throughout dplug:dsp to avoid reliance on GC. 286 /// Important: Size 0 is special-case to free the slice. 287 /// This works a bit like alignedRealloc except with slices as input. 288 /// You MUST use consistent alignement thoughout the lifetime of this buffer. 289 /// 290 /// Params: 291 /// buffer = Existing allocated buffer. Can be null. 292 /// Input slice length is not considered. 293 /// length = Desired slice length. 294 /// alignment = Alignement if the slice has allocation requirements, 1 else. 295 /// Must match for deallocation. 296 /// 297 void reallocBuffer(T)(ref T[] buffer, size_t length, int alignment = 1) nothrow @nogc 298 { 299 static if (is(T == struct) && hasElaborateDestructor!T) 300 { 301 static assert(false); // struct with destructors not supported 302 } 303 304 /// Size 0 is special-case to free the slice. 305 if (length == 0) 306 { 307 alignedFree(buffer.ptr, alignment); 308 buffer = null; 309 return; 310 } 311 312 T* pointer = cast(T*) alignedRealloc(buffer.ptr, T.sizeof * length, alignment); 313 if (pointer is null) 314 buffer = null; // alignment 1 can still return null 315 else 316 buffer = pointer[0..length]; 317 } 318 319 320 /// Returns: A newly created `Vec`. 321 Vec!T makeVec(T)(size_t initialSize = 0, int alignment = 1) nothrow @nogc 322 { 323 return Vec!T(initialSize, alignment); 324 } 325 326 /// Kind of a std::vector replacement. 327 /// Grow-only array, points to a (optionally aligned) memory location. 328 /// This can also work as an output range. 329 /// `Vec` is designed to work even when uninitialized, without `makeVec`. 330 struct Vec(T) 331 { 332 nothrow: 333 @nogc: 334 public 335 { 336 /// Creates an aligned buffer with given initial size. 337 this(size_t initialSize, int alignment) 338 { 339 assert(alignment != 0); 340 _size = 0; 341 _allocated = 0; 342 _data = null; 343 _alignment = alignment; 344 resize(initialSize); 345 } 346 347 ~this() 348 { 349 if (_data !is null) 350 { 351 alignedFree(_data, _alignment); 352 _data = null; 353 _allocated = 0; 354 } 355 } 356 357 @disable this(this); 358 359 /// Returns: Length of buffer in elements. 360 size_t length() pure const 361 { 362 return _size; 363 } 364 365 /// Returns: Length of buffer in elements. 366 alias opDollar = length; 367 368 /// Resizes a buffer to hold $(D askedSize) elements. 369 void resize(size_t askedSize) 370 { 371 // grow only 372 if (_allocated < askedSize) 373 { 374 size_t numBytes = askedSize * 2 * T.sizeof; // gives 2x what is asked to make room for growth 375 _data = cast(T*)(alignedRealloc(_data, numBytes, _alignment)); 376 _allocated = askedSize * 2; 377 } 378 _size = askedSize; 379 } 380 381 /// Pop last element 382 T popBack() 383 { 384 assert(_size > 0); 385 _size = _size - 1; 386 return _data[_size]; 387 } 388 389 /// Append an element to this buffer. 390 void pushBack(T x) 391 { 392 size_t i = _size; 393 resize(_size + 1); 394 _data[i] = x; 395 } 396 397 // DMD 2.088 deprecates the old D1-operators 398 static if (__VERSION__ >= 2088) 399 { 400 ///ditto 401 void opOpAssign(string op)(T x) if (op == "~") 402 { 403 pushBack(x); 404 } 405 } 406 else 407 { 408 ///ditto 409 void opCatAssign(T x) 410 { 411 pushBack(x); 412 } 413 } 414 415 // Output range support 416 alias put = pushBack; 417 418 /// Finds an item, returns -1 if not found 419 int indexOf(T x) 420 { 421 enum bool isStaticArray(T) = __traits(isStaticArray, T); 422 423 static if (isStaticArray!T) 424 { 425 // static array would be compared by identity as slice, which is not what we want. 426 foreach(int i; 0..cast(int)_size) 427 if (_data[i] == x) 428 return i; 429 } 430 else 431 { 432 // base types: identity is equality 433 // reference types: looking for identity 434 foreach(int i; 0..cast(int)_size) 435 if (_data[i] is x) 436 return i; 437 } 438 return -1; 439 } 440 441 /// Removes an item and replaces it by the last item. 442 /// Warning: this reorders the array. 443 void removeAndReplaceByLastElement(size_t index) 444 { 445 assert(index < _size); 446 _data[index] = _data[--_size]; 447 } 448 449 /// Removes an item and shift the rest of the array to front by 1. 450 /// Warning: O(N) complexity. 451 void removeAndShiftRestOfArray(size_t index) 452 { 453 assert(index < _size); 454 for (; index + 1 < _size; ++index) 455 _data[index] = _data[index+1]; 456 } 457 458 /// Appends another buffer to this buffer. 459 void pushBack(ref Vec other) 460 { 461 size_t oldSize = _size; 462 resize(_size + other._size); 463 memmove(_data + oldSize, other._data, T.sizeof * other._size); 464 } 465 466 /// Appends a slice to this buffer. 467 /// `slice` should not belong to the same buffer _data. 468 void pushBack(T[] slice) 469 { 470 size_t oldSize = _size; 471 size_t newSize = _size + slice.length; 472 resize(newSize); 473 for (size_t n = 0; n < slice.length; ++n) 474 _data[oldSize + n] = slice[n]; 475 } 476 477 /// Returns: Raw pointer to data. 478 @property inout(T)* ptr() inout 479 { 480 return _data; 481 } 482 483 /// Returns: n-th element. 484 ref inout(T) opIndex(size_t i) pure inout 485 { 486 return _data[i]; 487 } 488 489 T opIndexAssign(T x, size_t i) 490 { 491 return _data[i] = x; 492 } 493 494 /// Sets size to zero, but keeps allocated buffers. 495 void clearContents() 496 { 497 _size = 0; 498 } 499 500 /// Returns: Whole content of the array in one slice. 501 inout(T)[] opSlice() inout 502 { 503 return opSlice(0, length()); 504 } 505 506 /// Returns: A slice of the array. 507 inout(T)[] opSlice(size_t i1, size_t i2) inout 508 { 509 return _data[i1 .. i2]; 510 } 511 512 /// Fills the buffer with the same value. 513 void fill(T x) 514 { 515 _data[0.._size] = x; 516 } 517 518 /// Move. Give up owner ship of the data. 519 T[] releaseData() 520 { 521 T[] data = _data[0.._size]; 522 assert(_alignment == 1); // else would need to be freed with alignedFree. 523 this._data = null; 524 this._size = 0; 525 this._allocated = 0; 526 this._alignment = 0; 527 return data; 528 } 529 } 530 531 private 532 { 533 size_t _size = 0; 534 T* _data = null; 535 size_t _allocated = 0; 536 size_t _alignment = 1; // for an unaligned Vec, you probably are not interested in alignment 537 } 538 } 539 540 unittest 541 { 542 import std.range.primitives; 543 static assert(isOutputRange!(Vec!ubyte, ubyte)); 544 545 546 import std.random; 547 import std.algorithm.comparison; 548 int NBUF = 200; 549 550 Xorshift32 rng; 551 rng.seed(0xBAADC0DE); 552 553 struct box2i { int a, b, c, d; } 554 Vec!box2i[] boxes; 555 boxes.length = NBUF; 556 557 foreach(i; 0..NBUF) 558 { 559 boxes[i] = makeVec!box2i(); 560 } 561 562 foreach(j; 0..200) 563 { 564 foreach(i; 0..NBUF) 565 { 566 int previousSize = cast(int)(boxes[i].length); 567 void* previousPtr = boxes[i].ptr; 568 foreach(int k; 0..cast(int)(boxes[i].length)) 569 boxes[i][k] = box2i(k, k, k, k); 570 571 int newSize = uniform(0, 100, rng); 572 boxes[i].resize(newSize); 573 574 int minSize = min(previousSize, boxes[i].length); 575 void* newPtr = boxes[i].ptr; 576 foreach(int k; 0..minSize) 577 { 578 box2i item = boxes[i][k]; 579 box2i shouldBe = box2i(k, k, k, k); 580 assert(item == shouldBe); 581 } 582 583 int sum = 0; 584 foreach(k; 0..newSize) 585 { 586 box2i bb = boxes[i][k]; 587 sum += bb.a + bb.b + bb.c + bb.d; 588 } 589 } 590 } 591 592 foreach(i; 0..NBUF) 593 boxes[i].destroy(); 594 595 { 596 auto buf = makeVec!int; 597 enum N = 10; 598 buf.resize(N); 599 foreach(i ; 0..N) 600 buf[i] = i; 601 602 foreach(i ; 0..N) 603 assert(buf[i] == i); 604 605 auto buf2 = makeVec!int; 606 buf2.pushBack(11); 607 buf2.pushBack(14); 608 609 // test pushBack(slice) 610 buf.clearContents(); 611 buf.pushBack(buf2[]); 612 assert(buf[0] == 11); 613 assert(buf[1] == 14); 614 615 // test pushBack(slice) 616 buf2[1] = 8; 617 buf.clearContents(); 618 buf.pushBack(buf2); 619 assert(buf[0] == 11); 620 assert(buf[1] == 8); 621 } 622 } 623 624 625 // Vec should work without any initialization 626 unittest 627 { 628 Vec!string vec; 629 630 foreach(e; vec[]) 631 { 632 } 633 634 assert(vec.length == 0); 635 vec.clearContents(); 636 vec.resize(0); 637 assert(vec == vec.init); 638 vec.fill("filler"); 639 assert(vec.ptr is null); 640 } 641 642 // Issue #312: vec.opIndex not returning ref which break struct assignment 643 unittest 644 { 645 static struct A 646 { 647 int x; 648 } 649 Vec!A vec = makeVec!A(); 650 A a; 651 vec.pushBack(a); 652 vec ~= a; 653 vec[0].x = 42; // vec[0] needs to return a ref 654 assert(vec[0].x == 42); 655 } 656 657 /// Allows to merge the allocation of several arrays, which saves allocation count and can speed up things thanks to locality. 658 /// 659 /// Example: see below unittest. 660 struct MergedAllocation 661 { 662 nothrow: 663 @nogc: 664 665 enum maxExpectedAlignment = 32; 666 667 /// Start defining the area of allocations. 668 void start() 669 { 670 _base = cast(ubyte*)(cast(size_t)0); 671 } 672 673 /// Given pointer `base`, `array` gets an alignement area with `numElems` T elements and a given alignment. 674 /// `base` gets incremented to point to just after that area. 675 /// This is usful to create merged allocations with a chain of `mergedAllocArea`. 676 /// Giving null to this chain and converting the result to size_t give the total needed size for the merged allocation. 677 /// Warning: if called after a `start()` call, the area returned are wrong and are only for counting needed bytes. 678 /// if called after an `allocate()` call, the area returned are right (if the same calls are done). 679 void allocArray(T)(out T[] array, size_t numElems, size_t alignment = 1) 680 { 681 assert(alignment <= maxExpectedAlignment); 682 assert( (alignment != 0) && ((alignment & (alignment - 1)) == 0)); // power of two 683 684 size_t adr = cast(size_t) _base; 685 686 // 1. Align base address 687 size_t mask = ~(alignment - 1); 688 adr = (adr + alignment - 1) & mask; 689 690 // 2. Assign array and base. 691 array = (cast(T*)adr)[0..numElems]; 692 adr += T.sizeof * numElems; 693 _base = cast(ubyte*) adr; 694 } 695 696 ///ditto 697 void alloc(T)(out T* array, size_t numElems, size_t alignment = 1) 698 { 699 T[] arr; 700 allocArray(arr, numElems, alignment); 701 array = arr.ptr; 702 } 703 704 /// Allocate actual storage for the merged allocation. From there, you need to define exactly the same area with `alloc` and `allocArray`. 705 /// This time they will get a proper value. 706 void allocate() 707 { 708 size_t sizeNeeded = cast(size_t)_base; // since it was fed 0 at start. 709 710 // the merged allocation needs to have the largest expected alignment, else the size could depend on the hazards 711 // of the allocation. With maximum alignment, padding is the same so long as areas have smaller or equal alignment requirements. 712 _allocation = cast(ubyte*) _mm_realloc(_allocation, sizeNeeded, maxExpectedAlignment); 713 714 // So that the next layout call points to the right area. 715 _base = _allocation; 716 } 717 718 ~this() 719 { 720 _allocation = cast(ubyte*) _mm_realloc(_allocation, 0, maxExpectedAlignment); 721 } 722 723 724 private: 725 726 // Location of the allocation. 727 ubyte* _allocation = null; 728 729 /// 730 ubyte* _base = null; 731 } 732 733 unittest 734 { 735 static struct MyDSPStruct 736 { 737 public: 738 nothrow: 739 @nogc: 740 void initialize(int maxFrames) 741 { 742 _mergedAlloc.start(); 743 layout(_mergedAlloc, maxFrames); // you need such a layout function to be called twice. 744 _mergedAlloc.allocate(); 745 layout(_mergedAlloc, maxFrames); // the first time arrays area allocated in the `null` area, the second time in 746 // actually allocated memory (since we now have the needed length). 747 } 748 749 void layout(ref MergedAllocation ma, int maxFrames) 750 { 751 // allocate `maxFrames` elems, and return a slice in `_intermediateBuf`. 752 ma.allocArray(_intermediateBuf, maxFrames); 753 754 // allocate `maxFrames` elems, aligned to 16-byte boundaries. Return a pointer to that in `_coeffs`. 755 ma.alloc(_coeffs, maxFrames, 16); 756 } 757 758 private: 759 float[] _intermediateBuf; 760 double* _coeffs; 761 MergedAllocation _mergedAlloc; 762 } 763 764 MyDSPStruct s; 765 s.initialize(14); 766 s._coeffs[0..14] = 1.0f; 767 s._intermediateBuf[0..14] = 1.0f; 768 s.initialize(17); 769 s._coeffs[0..17] = 1.0f; 770 s._intermediateBuf[0..17] = 1.0f; 771 }