1 /** 2 Defines `Vec`, `reallocBuffer` and memory functions. 3 4 Copyright: Guillaume Piolat 2015-2016. 5 License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 6 Authors: Guillaume Piolat 7 */ 8 module dplug.core.vec; 9 10 import std.traits: hasElaborateDestructor; 11 12 import core.stdc.stdlib: malloc, free, realloc; 13 import core.stdc.string: memcpy, memmove; 14 15 import core.exception; 16 import inteli.xmmintrin; 17 18 19 // This module deals with aligned memory. 20 // You'll also find here a non-copyable std::vector equivalent `Vec`. 21 22 /// Allocates an aligned memory chunk. 23 /// Functionally equivalent to Visual C++ _aligned_malloc. 24 /// Do not mix allocations with different alignment. 25 /// Important: `alignedMalloc(0)` does not necessarily return `null`, and its result 26 /// _has_ to be freed with `alignedFree`. 27 void* alignedMalloc(size_t size, size_t alignment) nothrow @nogc 28 { 29 assert(alignment != 0); 30 31 // Short-cut and use the C allocator to avoid overhead if no alignment 32 if (alignment == 1) 33 { 34 // C99: 35 // Implementation-defined behavior 36 // Whether the calloc, malloc, and realloc functions return a null pointer 37 // or a pointer to an allocated object when the size requested is zero. 38 // In any case, we'll have to free() it. 39 return malloc(size); 40 } 41 42 size_t request = requestedSize(size, alignment); 43 void* raw = malloc(request); 44 45 if (request > 0 && raw == null) // malloc(0) can validly return anything 46 onOutOfMemoryError(); 47 48 return storeRawPointerPlusSizeAndReturnAligned(raw, size, alignment); 49 } 50 51 /// Frees aligned memory allocated by alignedMalloc or alignedRealloc. 52 /// Functionally equivalent to Visual C++ _aligned_free. 53 /// Do not mix allocations with different alignment. 54 void alignedFree(void* aligned, size_t alignment) nothrow @nogc 55 { 56 // Short-cut and use the C allocator to avoid overhead if no alignment 57 if (alignment == 1) 58 return free(aligned); 59 60 // support for free(NULL) 61 if (aligned is null) 62 return; 63 64 assert(alignment != 0); 65 assert(isPointerAligned(aligned, alignment)); 66 67 void** rawLocation = cast(void**)(cast(char*)aligned - size_t.sizeof); 68 free(*rawLocation); 69 } 70 71 /// Reallocates an aligned memory chunk allocated by `alignedMalloc` or `alignedRealloc`. 72 /// Functionally equivalent to Visual C++ `_aligned_realloc`. 73 /// Do not mix allocations with different alignment. 74 /// Important: `alignedRealloc(p, 0)` does not necessarily return `null`, and its result 75 /// _has_ to be freed with `alignedFree`. 76 void* alignedRealloc(void* aligned, size_t size, size_t alignment) nothrow @nogc 77 { 78 return alignedReallocImpl!true(aligned, size, alignment); 79 } 80 81 82 /// Same as `alignedRealloc` but does not preserve data. 83 void* alignedReallocDiscard(void* aligned, size_t size, size_t alignment) nothrow @nogc 84 { 85 return alignedReallocImpl!false(aligned, size, alignment); 86 } 87 88 89 /// Returns: `true` if the pointer is suitably aligned. 90 bool isPointerAligned(void* p, size_t alignment) pure nothrow @nogc 91 { 92 assert(alignment != 0); 93 return ( cast(size_t)p & (alignment - 1) ) == 0; 94 } 95 unittest 96 { 97 ubyte b; 98 align(16) ubyte[5] c; 99 assert(isPointerAligned(&b, 1)); 100 assert(!isPointerAligned(&c[1], 2)); 101 assert(isPointerAligned(&c[4], 4)); 102 } 103 104 /// Does memory slices a[0..a_size] and b[0..b_size] have an overlapping byte? 105 bool isMemoryOverlapping(const(void)* a, ptrdiff_t a_size, 106 const(void)* b, ptrdiff_t b_size) pure @trusted 107 { 108 assert(a_size >= 0 && b_size >= 0); 109 110 if (a is null || b is null) 111 return false; 112 113 if (a_size == 0 || b_size == 0) 114 return false; 115 116 ubyte* lA = cast(ubyte*)a; 117 ubyte* hA = lA + a_size; 118 ubyte* lB = cast(ubyte*)b; 119 ubyte* hB = lB + b_size; 120 121 // There is overlapping, if lA is inside lB..hB, or lB is inside lA..hA 122 123 if (lA >= lB && lA < hB) 124 return true; 125 126 if (lB >= lA && lB < hA) 127 return true; 128 129 return false; 130 } 131 bool isMemoryOverlapping(const(void)[] a, const(void)[] b) pure @trusted 132 { 133 return isMemoryOverlapping(a.ptr, a.length, b.ptr, b.length); 134 } 135 unittest 136 { 137 ubyte[100] a; 138 assert(!isMemoryOverlapping(null, a)); 139 assert(!isMemoryOverlapping(a, null)); 140 assert(!isMemoryOverlapping(a[1..1], a[0..10])); 141 assert(!isMemoryOverlapping(a[1..10], a[10..100])); 142 assert(!isMemoryOverlapping(a[30..100], a[0..30])); 143 assert(isMemoryOverlapping(a[1..50], a[49..100])); 144 assert(isMemoryOverlapping(a[49..100], a[1..50])); 145 assert(isMemoryOverlapping(a[40..45], a[30..55])); 146 assert(isMemoryOverlapping(a[30..55], a[40..45])); 147 } 148 149 private nothrow @nogc 150 { 151 void* alignedReallocImpl(bool PreserveDataIfResized)(void* aligned, size_t size, size_t alignment) 152 { 153 // Short-cut and use the C allocator to avoid overhead if no alignment 154 if (alignment == 1) 155 { 156 // C99: 157 // Implementation-defined behavior 158 // Whether the calloc, malloc, and realloc functions return a null pointer 159 // or a pointer to an allocated object when the size requested is zero. 160 // In any case, we'll have to `free()` it. 161 return realloc(aligned, size); 162 } 163 164 if (aligned is null) 165 return alignedMalloc(size, alignment); 166 167 assert(alignment != 0); 168 assert(isPointerAligned(aligned, alignment)); 169 170 size_t previousSize = *cast(size_t*)(cast(char*)aligned - size_t.sizeof * 2); 171 172 void* raw = *cast(void**)(cast(char*)aligned - size_t.sizeof); 173 size_t request = requestedSize(size, alignment); 174 size_t previousRequest = requestedSize(previousSize, alignment); 175 assert(previousRequest - request == previousSize - size); // same alignment 176 177 // Heuristic: if a requested size is within 50% to 100% of what is already allocated 178 // then exit with the same pointer 179 if ( (previousRequest < request * 4) && (request <= previousRequest) ) 180 return aligned; 181 182 void* newRaw = malloc(request); 183 static if( __VERSION__ > 2067 ) // onOutOfMemoryError wasn't nothrow before July 2014 184 { 185 if (request > 0 && newRaw == null) // realloc(0) can validly return anything 186 onOutOfMemoryError(); 187 } 188 189 void* newAligned = storeRawPointerPlusSizeAndReturnAligned(newRaw, size, alignment); 190 191 static if (PreserveDataIfResized) 192 { 193 size_t minSize = size < previousSize ? size : previousSize; 194 memcpy(newAligned, aligned, minSize); // memcpy OK 195 } 196 197 // Free previous data 198 alignedFree(aligned, alignment); 199 assert(isPointerAligned(newAligned, alignment)); 200 return newAligned; 201 } 202 203 /// Returns: next pointer aligned with alignment bytes. 204 void* nextAlignedPointer(void* start, size_t alignment) pure 205 { 206 import dplug.core.math : nextMultipleOf; 207 return cast(void*)nextMultipleOf(cast(size_t)(start), alignment); 208 } 209 210 // Returns number of bytes to actually allocate when asking 211 // for a particular alignement 212 size_t requestedSize(size_t askedSize, size_t alignment) pure 213 { 214 enum size_t pointerSize = size_t.sizeof; 215 return askedSize + alignment - 1 + pointerSize * 2; 216 } 217 218 // Store pointer given my malloc, and size in bytes initially requested (alignedRealloc needs it) 219 void* storeRawPointerPlusSizeAndReturnAligned(void* raw, size_t size, size_t alignment) 220 { 221 enum size_t pointerSize = size_t.sizeof; 222 char* start = cast(char*)raw + pointerSize * 2; 223 void* aligned = nextAlignedPointer(start, alignment); 224 void** rawLocation = cast(void**)(cast(char*)aligned - pointerSize); 225 *rawLocation = raw; 226 size_t* sizeLocation = cast(size_t*)(cast(char*)aligned - 2 * pointerSize); 227 *sizeLocation = size; 228 assert( isPointerAligned(aligned, alignment) ); 229 return aligned; 230 } 231 } 232 233 unittest 234 { 235 { 236 void* p = alignedMalloc(23, 16); 237 assert(p !is null); 238 assert(((cast(size_t)p) & 0xf) == 0); 239 240 alignedFree(p, 16); 241 } 242 243 void* nullAlloc = alignedMalloc(0, 16); 244 assert(nullAlloc != null); 245 nullAlloc = alignedRealloc(nullAlloc, 0, 16); 246 assert(nullAlloc != null); 247 alignedFree(nullAlloc, 16); 248 249 { 250 int alignment = 16; 251 int* p = null; 252 253 // check if growing keep values in place 254 foreach(int i; 0..100) 255 { 256 p = cast(int*) alignedRealloc(p, (i + 1) * int.sizeof, alignment); 257 p[i] = i; 258 } 259 260 foreach(int i; 0..100) 261 assert(p[i] == i); 262 263 p = cast(int*) alignedRealloc(p, 0, alignment); 264 assert(p !is null); 265 266 alignedFree(p, alignment); 267 } 268 269 // Verify that same size alloc preserve pointer. 270 { 271 void* p = null; 272 p = alignedRealloc(p, 254, 16); 273 void* p2 = alignedRealloc(p, 254, 16); 274 assert(p == p2); 275 276 // Test shrink heuristic 277 void* p3 = alignedRealloc(p, 128, 16); 278 assert(p == p3); 279 alignedFree(p3, 16); 280 } 281 } 282 283 284 285 /// Used throughout dplug:dsp to avoid reliance on GC. 286 /// Important: Size 0 is special-case to free the slice. 287 /// This works a bit like alignedRealloc except with slices as input. 288 /// You MUST use consistent alignement thoughout the lifetime of this buffer. 289 /// 290 /// Params: 291 /// buffer = Existing allocated buffer. Can be null. 292 /// Input slice length is not considered. 293 /// length = Desired slice length. 294 /// alignment = Alignement if the slice has allocation requirements, 1 else. 295 /// Must match for deallocation. 296 /// 297 /// Example: 298 /// --- 299 /// import std.stdio; 300 /// 301 /// struct MyDSP 302 /// { 303 /// nothrow @nogc: 304 /// 305 /// void initialize(int maxFrames) 306 /// { 307 /// // mybuf points to maxFrames frames 308 /// mybuf.reallocBuffer(maxFrames); 309 /// } 310 /// 311 /// ~this() 312 /// { 313 /// // If you don't free the buffer, it will leak. 314 /// mybuf.reallocBuffer(0); 315 /// } 316 /// 317 /// private: 318 /// float[] mybuf; 319 /// } 320 /// --- 321 void reallocBuffer(T)(ref T[] buffer, size_t length, int alignment = 1) nothrow @nogc 322 { 323 static if (is(T == struct) && hasElaborateDestructor!T) 324 { 325 static assert(false); // struct with destructors not supported 326 } 327 328 /// Size 0 is special-case to free the slice. 329 if (length == 0) 330 { 331 alignedFree(buffer.ptr, alignment); 332 buffer = null; 333 return; 334 } 335 336 T* pointer = cast(T*) alignedRealloc(buffer.ptr, T.sizeof * length, alignment); 337 if (pointer is null) 338 buffer = null; // alignment 1 can still return null 339 else 340 buffer = pointer[0..length]; 341 } 342 unittest 343 { 344 int[] arr; 345 arr.reallocBuffer(15); 346 assert(arr.length == 15); 347 arr.reallocBuffer(0); 348 assert(arr.length == 0); 349 } 350 351 352 /// Returns: A newly created `Vec`. 353 Vec!T makeVec(T)(size_t initialSize = 0, int alignment = 1) nothrow @nogc 354 { 355 return Vec!T(initialSize, alignment); 356 } 357 358 /// Kind of a std::vector replacement. 359 /// Grow-only array, points to a (optionally aligned) memory location. 360 /// This can also work as an output range. 361 /// `Vec` is designed to work even when uninitialized, without `makeVec`. 362 /// Warning: it is pretty barebones, doesn't respect T.init or call destructors. 363 /// When used in a GC program, GC roots won't be registered. 364 struct Vec(T) 365 { 366 nothrow: 367 @nogc: 368 369 static if (is(T == struct) && hasElaborateDestructor!T) 370 { 371 pragma(msg, "WARNING! struct with destructors were never meant to be supported in Vec!T. This will be removed in Dplug v15."); 372 } 373 374 public 375 { 376 /// Creates an aligned buffer with given initial size. 377 this(size_t initialSize, int alignment) @safe 378 { 379 assert(alignment != 0); 380 _size = 0; 381 _allocated = 0; 382 _data = null; 383 _alignment = alignment; 384 resizeExactly(initialSize); 385 } 386 387 ~this() @trusted 388 { 389 if (_data !is null) 390 { 391 alignedFree(_data, _alignment); 392 _data = null; 393 _allocated = 0; 394 } 395 } 396 397 @disable this(this); 398 399 /// Returns: Length of buffer in elements. 400 size_t length() pure const @safe 401 { 402 return _size; 403 } 404 ///ditto 405 alias size = length; 406 407 /// Returns: Length of buffer in elements, as signed. 408 ptrdiff_t slength() pure const @safe 409 { 410 return _size; 411 } 412 ///ditto 413 alias ssize = length; 414 415 /// Returns: `true` if length is zero. 416 bool isEmpty() pure const @safe 417 { 418 return _size == 0; 419 } 420 421 /// Returns: Allocated size of the underlying array. 422 size_t capacity() pure const @safe 423 { 424 return _allocated; 425 } 426 427 /// Returns: Length of buffer in elements. 428 alias opDollar = length; 429 430 /// Resizes a buffer to hold $(D askedSize) elements. 431 void resize(size_t askedSize) @trusted 432 { 433 resizeExactly(askedSize); 434 } 435 436 /// Pop last element 437 T popBack() @trusted 438 { 439 assert(_size > 0); 440 _size = _size - 1; 441 return _data[_size]; 442 } 443 444 /// Append an element to this buffer. 445 void pushBack(T x) @trusted 446 { 447 size_t i = _size; 448 resizeGrow(_size + 1); 449 _data[i] = x; 450 } 451 452 // DMD 2.088 deprecates the old D1-operators 453 static if (__VERSION__ >= 2088) 454 { 455 ///ditto 456 void opOpAssign(string op)(T x) @safe if (op == "~") 457 { 458 pushBack(x); 459 } 460 } 461 else 462 { 463 ///ditto 464 void opCatAssign(T x) @safe 465 { 466 pushBack(x); 467 } 468 } 469 470 // Output range support 471 alias put = pushBack; 472 473 /// Finds an item, returns -1 if not found 474 int indexOf(T x) @trusted 475 { 476 enum bool isStaticArray(T) = __traits(isStaticArray, T); 477 478 static if (isStaticArray!T) 479 { 480 // static array would be compared by identity as slice, which is not what we want. 481 foreach(int i; 0..cast(int)_size) 482 if (_data[i] == x) 483 return i; 484 } 485 else 486 { 487 // base types: identity is equality 488 // reference types: looking for identity 489 foreach(int i; 0..cast(int)_size) 490 if (_data[i] is x) 491 return i; 492 } 493 return -1; 494 } 495 496 /// Removes an item and replaces it by the last item. 497 /// Warning: this reorders the array. 498 void removeAndReplaceByLastElement(size_t index) @trusted 499 { 500 assert(index < _size); 501 _data[index] = _data[--_size]; 502 } 503 504 /// Removes an item and shift the rest of the array to front by 1. 505 /// Warning: O(N) complexity. 506 void removeAndShiftRestOfArray(size_t index) @trusted 507 { 508 assert(index < _size); 509 for (; index + 1 < _size; ++index) 510 _data[index] = _data[index+1]; 511 --_size; 512 } 513 514 /// Removes a range of items and shift the rest of the array to front. 515 /// Warning: O(N) complexity. 516 void removeAndShiftRestOfArray(size_t first, size_t last) @trusted 517 { 518 assert(first <= last && first <= _size && (last <= _size)); 519 size_t remain = _size - last; 520 for (size_t n = 0; n < remain; ++n) 521 _data[first + n] = _data[last + n]; 522 _size -= (last - first); 523 } 524 525 /// Appends another buffer to this buffer. 526 void pushBack(ref Vec other) @trusted 527 { 528 size_t oldSize = _size; 529 resizeGrow(_size + other._size); 530 memmove(_data + oldSize, other._data, T.sizeof * other._size); 531 } 532 533 /// Appends a slice to this buffer. 534 /// `slice` should not belong to the same buffer _data. 535 void pushBack(T[] slice) @trusted 536 { 537 size_t oldSize = _size; 538 size_t newSize = _size + slice.length; 539 resizeGrow(newSize); 540 for (size_t n = 0; n < slice.length; ++n) 541 _data[oldSize + n] = slice[n]; 542 } 543 544 /// Returns: Raw pointer to data. 545 @property inout(T)* ptr() inout @system 546 { 547 return _data; 548 } 549 550 /// Returns: n-th element. 551 ref inout(T) opIndex(size_t i) pure inout @trusted 552 { 553 return _data[i]; 554 } 555 556 T opIndexAssign(T x, size_t i) pure @trusted 557 { 558 return _data[i] = x; 559 } 560 561 /// Sets size to zero, but keeps allocated buffers. 562 void clearContents() pure @safe 563 { 564 _size = 0; 565 } 566 567 /// Returns: Whole content of the array in one slice. 568 inout(T)[] opSlice() inout @safe 569 { 570 return opSlice(0, length()); 571 } 572 573 /// Returns: A slice of the array. 574 inout(T)[] opSlice(size_t i1, size_t i2) inout @trusted 575 { 576 return _data[i1 .. i2]; 577 } 578 579 /// Fills the buffer with the same value. 580 void fill(T x) @trusted 581 { 582 _data[0.._size] = x; 583 } 584 585 /// Move. Give up owner ship of the data. 586 T[] releaseData() @system 587 { 588 T[] data = _data[0.._size]; 589 assert(_alignment == 1); // else would need to be freed with alignedFree. 590 this._data = null; 591 this._size = 0; 592 this._allocated = 0; 593 this._alignment = 0; 594 return data; 595 } 596 } 597 598 private 599 { 600 size_t _size = 0; 601 T* _data = null; 602 size_t _allocated = 0; 603 size_t _alignment = 1; // for an unaligned Vec, you probably are not interested in alignment 604 605 /// Used internally to grow in response to a pushBack operation. 606 /// Different heuristic, since we know that the resize is likely to be repeated for an 607 /// increasing size later. 608 void resizeGrow(size_t askedSize) @trusted 609 { 610 if (_allocated < askedSize) 611 { 612 613 version(all) 614 { 615 size_t newCap = computeNewCapacity(askedSize, _size); 616 setCapacity(newCap); 617 } 618 else 619 { 620 setCapacity(2 * askedSize); 621 } 622 } 623 _size = askedSize; 624 } 625 626 // Resizes the `Vector` to hold exactly `askedSize` elements. 627 // Still if the allocated capacity is larger, do nothing. 628 void resizeExactly(size_t askedSize) @trusted 629 { 630 if (_allocated < askedSize) 631 { 632 setCapacity(askedSize); 633 } 634 _size = askedSize; 635 } 636 637 // Internal use, realloc internal buffer, copy existing items. 638 // Doesn't initialize the new ones. 639 void setCapacity(size_t cap) 640 { 641 size_t numBytes = cap * T.sizeof; 642 _data = cast(T*)(alignedRealloc(_data, numBytes, _alignment)); 643 _allocated = cap; 644 } 645 646 // Compute new capacity, while growing. 647 size_t computeNewCapacity(size_t newLength, size_t oldLength) 648 { 649 // Optimal value (Windows malloc) not far from there. 650 enum size_t PAGESIZE = 4096; 651 652 size_t newLengthBytes = newLength * T.sizeof; 653 if (newLengthBytes > PAGESIZE) 654 { 655 // Larger arrays need a smaller growth factor to avoid wasting too much bytes. 656 // This was found when tracing curve, not too far from golden ratio for some reason. 657 return newLength + newLength / 2 + newLength / 8; 658 } 659 else 660 { 661 // For smaller arrays being pushBack, can bring welcome speed by minimizing realloc. 662 return newLength * 3; 663 } 664 } 665 } 666 } 667 668 unittest 669 { 670 import std.range.primitives; 671 static assert(isOutputRange!(Vec!ubyte, ubyte)); 672 673 674 import std.random; 675 int NBUF = 200; 676 677 Xorshift32 rng; 678 rng.seed(0xBAADC0DE); 679 680 struct box2i { int a, b, c, d; } 681 Vec!box2i[] boxes; 682 boxes.length = NBUF; 683 684 foreach(i; 0..NBUF) 685 { 686 boxes[i] = makeVec!box2i(); 687 } 688 689 foreach(j; 0..200) 690 { 691 foreach(i; 0..NBUF) 692 { 693 int previousSize = cast(int)(boxes[i].length); 694 void* previousPtr = boxes[i].ptr; 695 foreach(int k; 0..cast(int)(boxes[i].length)) 696 boxes[i][k] = box2i(k, k, k, k); 697 698 int newSize = uniform(0, 100, rng); 699 boxes[i].resize(newSize); 700 701 int minSize = previousSize; 702 if (minSize > boxes[i].length) 703 minSize = cast(int)(boxes[i].length); 704 705 void* newPtr = boxes[i].ptr; 706 foreach(int k; 0..minSize) 707 { 708 box2i item = boxes[i][k]; 709 box2i shouldBe = box2i(k, k, k, k); 710 assert(item == shouldBe); 711 } 712 713 int sum = 0; 714 foreach(k; 0..newSize) 715 { 716 box2i bb = boxes[i][k]; 717 sum += bb.a + bb.b + bb.c + bb.d; 718 } 719 } 720 } 721 722 foreach(i; 0..NBUF) 723 boxes[i].destroy(); 724 725 { 726 auto buf = makeVec!int; 727 enum N = 10; 728 buf.resize(N); 729 foreach(i ; 0..N) 730 buf[i] = i; 731 732 foreach(i ; 0..N) 733 assert(buf[i] == i); 734 735 auto buf2 = makeVec!int; 736 buf2.pushBack(11); 737 buf2.pushBack(14); 738 739 // test pushBack(slice) 740 buf.clearContents(); 741 buf.pushBack(buf2[]); 742 assert(buf[0] == 11); 743 assert(buf[1] == 14); 744 745 // test pushBack(slice) 746 buf2[1] = 8; 747 buf.clearContents(); 748 buf.pushBack(buf2); 749 assert(buf[0] == 11); 750 assert(buf[1] == 8); 751 } 752 } 753 754 // Vec should work without any initialization 755 unittest 756 { 757 Vec!string vec; 758 759 foreach(e; vec[]) 760 { 761 } 762 763 assert(vec.length == 0); 764 assert(vec.size == 0); 765 assert(vec.slength == 0); 766 assert(vec.ssize == 0); 767 vec.clearContents(); 768 vec.resize(0); 769 assert(vec == vec.init); 770 vec.fill("filler"); 771 assert(vec.ptr is null); 772 } 773 774 // Issue #312: vec.opIndex not returning ref which break struct assignment 775 unittest 776 { 777 static struct A 778 { 779 int x; 780 } 781 Vec!A vec = makeVec!A(); 782 A a; 783 vec.pushBack(a); 784 vec ~= a; 785 vec[0].x = 42; // vec[0] needs to return a ref 786 assert(vec[0].x == 42); 787 } 788 unittest // removeAndShiftRestOfArray was wrong 789 { 790 Vec!int v; 791 v.pushBack(14); 792 v.pushBack(27); 793 v.pushBack(38); 794 assert(v.length == 3); 795 v.removeAndShiftRestOfArray(1); 796 assert(v.length == 2); 797 assert(v[] == [14, 38]); 798 } 799 unittest // removeAndShiftRestOfArray with slice 800 { 801 Vec!int v; 802 v.pushBack([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); 803 v.removeAndShiftRestOfArray(3, 5); 804 assert(v[] == [0, 1, 2, 5, 6, 7, 8, 9]); 805 v.removeAndShiftRestOfArray(0, 4); 806 assert(v[] == [6, 7, 8, 9]); 807 v.removeAndShiftRestOfArray(2, 4); 808 assert(v[] == [6, 7]); 809 v.removeAndShiftRestOfArray(2, 2); 810 v.removeAndShiftRestOfArray(0, 0); 811 v.removeAndShiftRestOfArray(1, 1); 812 assert(v[] == [6, 7]); 813 } 814 815 816 817 /// Allows to merge the allocation of several arrays, which saves allocation count and can speed up things thanks to locality. 818 /// 819 /// Example: see below unittest. 820 struct MergedAllocation 821 { 822 nothrow: 823 @nogc: 824 825 // In debug mode, add stomp detection to `MergedAllocation`. 826 // This takes additional complexity to coarse check for stomping nearby buffers. 827 debug 828 { 829 private enum mergedAllocStompWarning = true; 830 } 831 else 832 { 833 private enum mergedAllocStompWarning = false; 834 } 835 836 837 // This adds 32-byte of sentinels around each allocation, 838 // and check at the end of the program that they were unaffected. 839 static if (mergedAllocStompWarning) 840 { 841 // Number of bytes to write between areas to check for stomping. 842 enum SENTINEL_BYTES = 32; 843 } 844 845 enum maxExpectedAlignment = 32; 846 847 /// Start defining the area of allocations. 848 void start() 849 { 850 // Detect memory errors in former uses. 851 static if (mergedAllocStompWarning) 852 { 853 checkSentinelAreas(); 854 _allocateWasCalled = false; 855 } 856 857 _base = cast(ubyte*)(cast(size_t)0); 858 } 859 860 /// Allocate (or count) space needed for `numElems` elements of type `T` with given alignment. 861 /// This gets called twice for each array, see example for usage. 862 /// 863 /// This bumps the internal bump allocator. 864 /// Giving null to this chain and converting the result to size_t give the total needed size 865 /// for the merged allocation. 866 /// 867 /// Warning: 868 /// - If called after a `start()` call, the area returned are wrong and are only for 869 /// counting needed bytes. Don't use those. 870 /// 871 /// - If called after an `allocate()` call, the area returned are an actual merged 872 /// allocation (if the same calls are done). 873 /// 874 /// Warning: Prefer `allocArray` over `alloc` variant, since the extra length field WILL help 875 /// you catch memory errors before release. Else it is very common to miss buffer 876 /// overflows in samplerate changes. 877 void allocArray(T)(out T[] array, size_t numElems, size_t alignment = 1) 878 { 879 assert(alignment <= maxExpectedAlignment); 880 assert( (alignment != 0) && ((alignment & (alignment - 1)) == 0)); // power of two 881 882 size_t adr = cast(size_t) _base; 883 884 // 1. Align base address 885 size_t mask = ~(alignment - 1); 886 adr = (adr + alignment - 1) & mask; 887 888 // 2. Assign array and base. 889 array = (cast(T*)adr)[0..numElems]; 890 adr += T.sizeof * numElems; 891 _base = cast(ubyte*) adr; 892 893 static if (mergedAllocStompWarning) 894 { 895 if (_allocateWasCalled && _allocation !is null) 896 { 897 // Each allocated area followed with SENTINEL_BYTES bytes of value 0xCC 898 _base[0..SENTINEL_BYTES] = 0xCC; 899 registerSentinel(_base); 900 } 901 _base += SENTINEL_BYTES; 902 } 903 } 904 905 ///ditto 906 void alloc(T)(out T* array, size_t numElems, size_t alignment = 1) 907 { 908 T[] arr; 909 allocArray(arr, numElems, alignment); 910 array = arr.ptr; 911 } 912 913 /// Allocate actual storage for the merged allocation. From there, you need to define exactly the same area with `alloc` and `allocArray`. 914 /// This time they will get a proper value. 915 void allocate() 916 { 917 static if (mergedAllocStompWarning) _allocateWasCalled = true; 918 919 size_t sizeNeeded = cast(size_t)_base; // since it was fed 0 at start. 920 921 if (sizeNeeded == 0) 922 { 923 // If no bytes are requested, it means no buffer were requested, or only with zero size. 924 // We will return a null pointer in that case, since accessing them would be illegal anyway. 925 _allocation = null; 926 } 927 else 928 { 929 // the merged allocation needs to have the largest expected alignment, else the size could depend on the hazards 930 // of the allocation. With maximum alignment, padding is the same so long as areas have smaller or equal alignment requirements. 931 _allocation = cast(ubyte*) _mm_realloc(_allocation, sizeNeeded, maxExpectedAlignment); 932 } 933 934 // So that the next layout call points to the right area. 935 _base = _allocation; 936 } 937 938 ~this() 939 { 940 static if (mergedAllocStompWarning) 941 { 942 checkSentinelAreas(); 943 } 944 945 if (_allocation != null) 946 { 947 _mm_free(_allocation); 948 _allocation = null; 949 } 950 } 951 952 private: 953 954 // Location of the allocation. 955 ubyte* _allocation = null; 956 957 /// 958 ubyte* _base = null; 959 960 static if (mergedAllocStompWarning) 961 { 962 bool _allocateWasCalled = false; 963 964 Vec!(ubyte*) _sentinels; // start of sentinel area (SENTINAL_BYTES long) 965 966 void registerSentinel(ubyte* start) 967 { 968 _sentinels.pushBack(start); 969 } 970 971 bool hasMemoryError() // true if sentinel bytes stomped 972 { 973 assert(_allocateWasCalled && _allocation !is null); 974 975 foreach(ubyte* s; _sentinels[]) 976 { 977 for (int n = 0; n < 32; ++n) 978 { 979 if (s[n] != 0xCC) 980 return true; 981 } 982 } 983 return false; 984 } 985 986 // Check existing sentinels, and unregister them 987 void checkSentinelAreas() 988 { 989 if (!_allocateWasCalled) 990 return; // still haven't done an allocation, nothing to check. 991 992 if (_allocation is null) 993 return; // nothing to check 994 995 // If you fail here, there is a memory error in your access patterns. 996 // Sentinel bytes of value 0xCC were overwritten in a `MergedAllocation`. 997 // You can use slices with `allocArray` instead of `alloc` to find the faulty 998 // access. This check doesn't catch everything! 999 assert(! hasMemoryError()); 1000 1001 _sentinels.clearContents(); 1002 } 1003 } 1004 } 1005 1006 1007 // Here is how you should use MergedAllocation. 1008 unittest 1009 { 1010 static struct MyDSPStruct 1011 { 1012 public: 1013 nothrow: 1014 @nogc: 1015 void initialize(int maxFrames) 1016 { 1017 _mergedAlloc.start(); 1018 layout(_mergedAlloc, maxFrames); // you need such a layout function to be called twice. 1019 _mergedAlloc.allocate(); 1020 layout(_mergedAlloc, maxFrames); // the first time arrays area allocated in the `null` area, the second time in 1021 // actually allocated memory (since we now have the needed length). 1022 } 1023 1024 void layout(ref MergedAllocation ma, int maxFrames) 1025 { 1026 // allocate `maxFrames` elems, and return a slice in `_intermediateBuf`. 1027 ma.allocArray(_intermediateBuf, maxFrames); 1028 1029 // allocate `maxFrames` elems, aligned to 16-byte boundaries. Return a pointer to that in `_coeffs`. 1030 ma.alloc(_coeffs, maxFrames, 16); 1031 } 1032 1033 private: 1034 float[] _intermediateBuf; 1035 double* _coeffs; 1036 MergedAllocation _mergedAlloc; 1037 } 1038 1039 MyDSPStruct s; 1040 s.initialize(14); 1041 s._coeffs[0..14] = 1.0f; 1042 s._intermediateBuf[0..14] = 1.0f; 1043 s.initialize(17); 1044 s._coeffs[0..17] = 1.0f; 1045 s._intermediateBuf[0..17] = 1.0f; 1046 } 1047 1048 // Should be valid to allocate nothing with a MergedAllocation. 1049 unittest 1050 { 1051 MergedAllocation ma; 1052 ma.start(); 1053 ma.allocate(); 1054 assert(ma._allocation == null); 1055 } 1056 1057 // Should be valid to allocate nothing with a MergedAllocation. 1058 unittest 1059 { 1060 MergedAllocation ma; 1061 ma.start(); 1062 ubyte[] arr, arr2; 1063 ma.allocArray(arr, 17); 1064 ma.allocArray(arr2, 24); 1065 assert(ma._allocation == null); 1066 1067 ma.allocate(); 1068 ma.allocArray(arr, 17); 1069 ma.allocArray(arr2, 24); 1070 assert(ma._allocation != null); 1071 assert(arr.length == 17); 1072 assert(arr2.length == 24); 1073 1074 // Test memory error detection with a simple case. 1075 static if (MergedAllocation.mergedAllocStompWarning) 1076 { 1077 assert(!ma.hasMemoryError()); 1078 1079 // Create a memory error 1080 arr.ptr[18] = 2; 1081 assert(ma.hasMemoryError()); 1082 arr.ptr[18] = 0xCC; // undo the error to avoid stopping the unittests 1083 } 1084 }