1 /** 2 Defines `Vec`, `reallocBuffer` and memory functions. 3 4 Copyright: Guillaume Piolat 2015-2024. 5 License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 6 Authors: Guillaume Piolat 7 */ 8 module dplug.core.vec; 9 10 import std.traits: hasElaborateDestructor; 11 12 import core.stdc.stdlib: malloc, free, realloc; 13 import core.stdc.string: memcpy, memmove; 14 15 import core.exception; 16 import inteli.xmmintrin; 17 18 19 // This module deals with aligned memory. 20 // You'll also find here a non-copyable std::vector equivalent `Vec`. 21 22 /// Allocates an aligned memory chunk. 23 /// Functionally equivalent to Visual C++ _aligned_malloc. 24 /// Do not mix allocations with different alignment. 25 /// Important: `alignedMalloc(0)` does not necessarily return `null`, and its result 26 /// _has_ to be freed with `alignedFree`. 27 void* alignedMalloc(size_t size, size_t alignment) nothrow @nogc 28 { 29 assert(alignment != 0); 30 31 // Short-cut and use the C allocator to avoid overhead if no alignment 32 if (alignment == 1) 33 { 34 // C99: 35 // Implementation-defined behavior 36 // Whether the calloc, malloc, and realloc functions return a null pointer 37 // or a pointer to an allocated object when the size requested is zero. 38 // In any case, we'll have to free() it. 39 return malloc(size); 40 } 41 42 size_t request = requestedSize(size, alignment); 43 void* raw = malloc(request); 44 45 if (request > 0 && raw == null) // malloc(0) can validly return anything 46 onOutOfMemoryError(); 47 48 return storeRawPointerPlusSizeAndReturnAligned(raw, size, alignment); 49 } 50 51 /// Frees aligned memory allocated by alignedMalloc or alignedRealloc. 52 /// Functionally equivalent to Visual C++ _aligned_free. 53 /// Do not mix allocations with different alignment. 54 void alignedFree(void* aligned, size_t alignment) nothrow @nogc 55 { 56 // Short-cut and use the C allocator to avoid overhead if no alignment 57 if (alignment == 1) 58 return free(aligned); 59 60 // support for free(NULL) 61 if (aligned is null) 62 return; 63 64 assert(alignment != 0); 65 assert(isPointerAligned(aligned, alignment)); 66 67 void** rawLocation = cast(void**)(cast(char*)aligned - size_t.sizeof); 68 free(*rawLocation); 69 } 70 71 /// Reallocates an aligned memory chunk allocated by `alignedMalloc` or `alignedRealloc`. 72 /// Functionally equivalent to Visual C++ `_aligned_realloc`. 73 /// Do not mix allocations with different alignment. 74 /// Important: `alignedRealloc(p, 0)` does not necessarily return `null`, and its result 75 /// _has_ to be freed with `alignedFree`. 76 void* alignedRealloc(void* aligned, size_t size, size_t alignment) nothrow @nogc 77 { 78 return alignedReallocImpl!true(aligned, size, alignment); 79 } 80 81 82 /// Same as `alignedRealloc` but does not preserve data. 83 void* alignedReallocDiscard(void* aligned, size_t size, size_t alignment) nothrow @nogc 84 { 85 return alignedReallocImpl!false(aligned, size, alignment); 86 } 87 88 89 /// Returns: `true` if the pointer is suitably aligned. 90 bool isPointerAligned(void* p, size_t alignment) pure nothrow @nogc 91 { 92 assert(alignment != 0); 93 return ( cast(size_t)p & (alignment - 1) ) == 0; 94 } 95 unittest 96 { 97 ubyte b; 98 align(16) ubyte[5] c; 99 assert(isPointerAligned(&b, 1)); 100 assert(!isPointerAligned(&c[1], 2)); 101 assert(isPointerAligned(&c[4], 4)); 102 } 103 104 /// Does memory slices a[0..a_size] and b[0..b_size] have an overlapping byte? 105 bool isMemoryOverlapping(const(void)* a, ptrdiff_t a_size, 106 const(void)* b, ptrdiff_t b_size) pure @trusted 107 { 108 assert(a_size >= 0 && b_size >= 0); 109 110 if (a is null || b is null) 111 return false; 112 113 if (a_size == 0 || b_size == 0) 114 return false; 115 116 ubyte* lA = cast(ubyte*)a; 117 ubyte* hA = lA + a_size; 118 ubyte* lB = cast(ubyte*)b; 119 ubyte* hB = lB + b_size; 120 121 // There is overlapping, if lA is inside lB..hB, or lB is inside lA..hA 122 123 if (lA >= lB && lA < hB) 124 return true; 125 126 if (lB >= lA && lB < hA) 127 return true; 128 129 return false; 130 } 131 bool isMemoryOverlapping(const(void)[] a, const(void)[] b) pure @trusted 132 { 133 return isMemoryOverlapping(a.ptr, a.length, b.ptr, b.length); 134 } 135 unittest 136 { 137 ubyte[100] a; 138 assert(!isMemoryOverlapping(null, a)); 139 assert(!isMemoryOverlapping(a, null)); 140 assert(!isMemoryOverlapping(a[1..1], a[0..10])); 141 assert(!isMemoryOverlapping(a[1..10], a[10..100])); 142 assert(!isMemoryOverlapping(a[30..100], a[0..30])); 143 assert(isMemoryOverlapping(a[1..50], a[49..100])); 144 assert(isMemoryOverlapping(a[49..100], a[1..50])); 145 assert(isMemoryOverlapping(a[40..45], a[30..55])); 146 assert(isMemoryOverlapping(a[30..55], a[40..45])); 147 } 148 149 private nothrow @nogc 150 { 151 void* alignedReallocImpl(bool PreserveDataIfResized)(void* aligned, size_t size, size_t alignment) 152 { 153 // Short-cut and use the C allocator to avoid overhead if no alignment 154 if (alignment == 1) 155 { 156 // C99: 157 // Implementation-defined behavior 158 // Whether the calloc, malloc, and realloc functions return a null pointer 159 // or a pointer to an allocated object when the size requested is zero. 160 // In any case, we'll have to `free()` it. 161 return realloc(aligned, size); 162 } 163 164 if (aligned is null) 165 return alignedMalloc(size, alignment); 166 167 assert(alignment != 0); 168 assert(isPointerAligned(aligned, alignment)); 169 170 size_t previousSize = *cast(size_t*)(cast(char*)aligned - size_t.sizeof * 2); 171 172 void* raw = *cast(void**)(cast(char*)aligned - size_t.sizeof); 173 size_t request = requestedSize(size, alignment); 174 size_t previousRequest = requestedSize(previousSize, alignment); 175 assert(previousRequest - request == previousSize - size); // same alignment 176 177 // Heuristic: if a requested size is within 50% to 100% of what is already allocated 178 // then exit with the same pointer 179 if ( (previousRequest < request * 4) && (request <= previousRequest) ) 180 return aligned; 181 182 void* newRaw = malloc(request); 183 static if( __VERSION__ > 2067 ) // onOutOfMemoryError wasn't nothrow before July 2014 184 { 185 if (request > 0 && newRaw == null) // realloc(0) can validly return anything 186 onOutOfMemoryError(); 187 } 188 189 void* newAligned = storeRawPointerPlusSizeAndReturnAligned(newRaw, size, alignment); 190 191 static if (PreserveDataIfResized) 192 { 193 size_t minSize = size < previousSize ? size : previousSize; 194 memcpy(newAligned, aligned, minSize); // memcpy OK 195 } 196 197 // Free previous data 198 alignedFree(aligned, alignment); 199 assert(isPointerAligned(newAligned, alignment)); 200 return newAligned; 201 } 202 203 /// Returns: next pointer aligned with alignment bytes. 204 void* nextAlignedPointer(void* start, size_t alignment) pure 205 { 206 import dplug.core.math : nextMultipleOf; 207 return cast(void*)nextMultipleOf(cast(size_t)(start), alignment); 208 } 209 210 // Returns number of bytes to actually allocate when asking 211 // for a particular alignement 212 size_t requestedSize(size_t askedSize, size_t alignment) pure 213 { 214 enum size_t pointerSize = size_t.sizeof; 215 return askedSize + alignment - 1 + pointerSize * 2; 216 } 217 218 // Store pointer given my malloc, and size in bytes initially requested (alignedRealloc needs it) 219 void* storeRawPointerPlusSizeAndReturnAligned(void* raw, size_t size, size_t alignment) 220 { 221 enum size_t pointerSize = size_t.sizeof; 222 char* start = cast(char*)raw + pointerSize * 2; 223 void* aligned = nextAlignedPointer(start, alignment); 224 void** rawLocation = cast(void**)(cast(char*)aligned - pointerSize); 225 *rawLocation = raw; 226 size_t* sizeLocation = cast(size_t*)(cast(char*)aligned - 2 * pointerSize); 227 *sizeLocation = size; 228 assert( isPointerAligned(aligned, alignment) ); 229 return aligned; 230 } 231 } 232 233 unittest 234 { 235 { 236 void* p = alignedMalloc(23, 16); 237 assert(p !is null); 238 assert(((cast(size_t)p) & 0xf) == 0); 239 240 alignedFree(p, 16); 241 } 242 243 void* nullAlloc = alignedMalloc(0, 16); 244 assert(nullAlloc != null); 245 nullAlloc = alignedRealloc(nullAlloc, 0, 16); 246 assert(nullAlloc != null); 247 alignedFree(nullAlloc, 16); 248 249 { 250 int alignment = 16; 251 int* p = null; 252 253 // check if growing keep values in place 254 foreach(int i; 0..100) 255 { 256 p = cast(int*) alignedRealloc(p, (i + 1) * int.sizeof, alignment); 257 p[i] = i; 258 } 259 260 foreach(int i; 0..100) 261 assert(p[i] == i); 262 263 p = cast(int*) alignedRealloc(p, 0, alignment); 264 assert(p !is null); 265 266 alignedFree(p, alignment); 267 } 268 269 // Verify that same size alloc preserve pointer. 270 { 271 void* p = null; 272 p = alignedRealloc(p, 254, 16); 273 void* p2 = alignedRealloc(p, 254, 16); 274 assert(p == p2); 275 276 // Test shrink heuristic 277 void* p3 = alignedRealloc(p, 128, 16); 278 assert(p == p3); 279 alignedFree(p3, 16); 280 } 281 } 282 283 284 285 /// Used throughout dplug:dsp to avoid reliance on GC. 286 /// Important: Size 0 is special-case to free the slice. 287 /// This works a bit like alignedRealloc except with slices as input. 288 /// You MUST use consistent alignement thoughout the lifetime of this buffer. 289 /// 290 /// Params: 291 /// buffer = Existing allocated buffer. Can be null. 292 /// Input slice length is not considered. 293 /// length = Desired slice length. 294 /// alignment = Alignement if the slice has allocation requirements, 1 else. 295 /// Must match for deallocation. 296 /// 297 /// Example: 298 /// --- 299 /// import std.stdio; 300 /// 301 /// struct MyDSP 302 /// { 303 /// nothrow @nogc: 304 /// 305 /// void initialize(int maxFrames) 306 /// { 307 /// // mybuf points to maxFrames frames 308 /// mybuf.reallocBuffer(maxFrames); 309 /// } 310 /// 311 /// ~this() 312 /// { 313 /// // If you don't free the buffer, it will leak. 314 /// mybuf.reallocBuffer(0); 315 /// } 316 /// 317 /// private: 318 /// float[] mybuf; 319 /// } 320 /// --- 321 void reallocBuffer(T)(ref T[] buffer, size_t length, int alignment = 1) nothrow @nogc 322 { 323 static if (is(T == struct) && hasElaborateDestructor!T) 324 { 325 static assert(false); // struct with destructors not supported 326 } 327 328 /// Size 0 is special-case to free the slice. 329 if (length == 0) 330 { 331 alignedFree(buffer.ptr, alignment); 332 buffer = null; 333 return; 334 } 335 336 T* pointer = cast(T*) alignedRealloc(buffer.ptr, T.sizeof * length, alignment); 337 if (pointer is null) 338 buffer = null; // alignment 1 can still return null 339 else 340 buffer = pointer[0..length]; 341 } 342 unittest 343 { 344 int[] arr; 345 arr.reallocBuffer(15); 346 assert(arr.length == 15); 347 arr.reallocBuffer(0); 348 assert(arr.length == 0); 349 } 350 351 352 /// Returns: A newly created `Vec`. 353 Vec!T makeVec(T)(size_t initialSize = 0) nothrow @nogc 354 { 355 return Vec!T(initialSize); 356 } 357 358 deprecated("Vec!T must have alignment 1. This constructor will be removed in Dplug v16") 359 Vec!T makeVec(T)(size_t initialSize = 0, int alignment) nothrow @nogc 360 { 361 return Vec!T(initialSize, alignment); 362 } 363 364 /// Kind of a std::vector replacement. 365 /// Grow-only array, points to a (optionally aligned) memory location. 366 /// This can also work as an output range. 367 /// `Vec` is designed to work even when uninitialized, without `makeVec`. 368 /// Warning: it is pretty barebones, doesn't respect T.init or call destructors. 369 /// When used in a GC program, GC roots won't be registered. 370 struct Vec(T) 371 { 372 nothrow: 373 @nogc: 374 375 static if (is(T == struct) && hasElaborateDestructor!T) 376 { 377 static assert(false, "struct with destructors cannot go in `Vec!T`. You are welcome to use the `numem` package instead or do otherwise."); 378 } 379 380 public 381 { 382 /// Creates an aligned buffer with given initial size. 383 this(size_t initialSize) @safe 384 { 385 _size = 0; 386 _allocated = 0; 387 _data = null; 388 _alignment = 1; 389 resizeExactly(initialSize); 390 } 391 392 deprecated("Vec!T must have alignment 1. This constructor will be removed in Dplug v16") 393 this(size_t initialSize, int alignment) @safe 394 { 395 assert(alignment != 0); 396 _size = 0; 397 _allocated = 0; 398 _data = null; 399 _alignment = alignment; 400 resizeExactly(initialSize); 401 } 402 403 ~this() @trusted 404 { 405 if (_data !is null) 406 { 407 alignedFree(_data, _alignment); 408 _data = null; 409 _allocated = 0; 410 } 411 } 412 413 @disable this(this); 414 415 /// Returns: Length of buffer in elements. 416 size_t length() pure const @safe 417 { 418 return _size; 419 } 420 ///ditto 421 alias size = length; 422 423 /// Returns: Length of buffer in elements, as signed. 424 ptrdiff_t slength() pure const @safe 425 { 426 return _size; 427 } 428 ///ditto 429 alias ssize = length; 430 431 /// Returns: `true` if length is zero. 432 bool isEmpty() pure const @safe 433 { 434 return _size == 0; 435 } 436 437 /// Returns: Allocated size of the underlying array. 438 size_t capacity() pure const @safe 439 { 440 return _allocated; 441 } 442 443 /// Returns: Length of buffer in elements. 444 alias opDollar = length; 445 446 /// Resizes a buffer to hold $(D askedSize) elements. 447 void resize(size_t askedSize) @trusted 448 { 449 resizeExactly(askedSize); 450 } 451 452 /// Pop last element 453 T popBack() @trusted 454 { 455 assert(_size > 0); 456 _size = _size - 1; 457 return _data[_size]; 458 } 459 460 /// Append an element to this buffer. 461 void pushBack(T x) @trusted 462 { 463 size_t i = _size; 464 resizeGrow(_size + 1); 465 _data[i] = x; 466 } 467 468 // DMD 2.088 deprecates the old D1-operators 469 static if (__VERSION__ >= 2088) 470 { 471 ///ditto 472 void opOpAssign(string op)(T x) @safe if (op == "~") 473 { 474 pushBack(x); 475 } 476 } 477 else 478 { 479 ///ditto 480 void opCatAssign(T x) @safe 481 { 482 pushBack(x); 483 } 484 } 485 486 // Output range support 487 alias put = pushBack; 488 489 /// Finds an item, returns -1 if not found 490 int indexOf(T x) @trusted 491 { 492 enum bool isStaticArray(T) = __traits(isStaticArray, T); 493 494 static if (isStaticArray!T) 495 { 496 // static array would be compared by identity as slice, which is not what we want. 497 foreach(int i; 0..cast(int)_size) 498 if (_data[i] == x) 499 return i; 500 } 501 else 502 { 503 // base types: identity is equality 504 // reference types: looking for identity 505 foreach(int i; 0..cast(int)_size) 506 if (_data[i] is x) 507 return i; 508 } 509 return -1; 510 } 511 512 /// Removes an item and replaces it by the last item. 513 /// Warning: this reorders the array. 514 void removeAndReplaceByLastElement(size_t index) @trusted 515 { 516 assert(index < _size); 517 _data[index] = _data[--_size]; 518 } 519 520 /// Removes an item and shift the rest of the array to front by 1. 521 /// Warning: O(N) complexity. 522 void removeAndShiftRestOfArray(size_t index) @trusted 523 { 524 assert(index < _size); 525 for (; index + 1 < _size; ++index) 526 _data[index] = _data[index+1]; 527 --_size; 528 } 529 530 /// Removes a range of items and shift the rest of the array to front. 531 /// Warning: O(N) complexity. 532 void removeAndShiftRestOfArray(size_t first, size_t last) @trusted 533 { 534 assert(first <= last && first <= _size && (last <= _size)); 535 size_t remain = _size - last; 536 for (size_t n = 0; n < remain; ++n) 537 _data[first + n] = _data[last + n]; 538 _size -= (last - first); 539 } 540 541 /// Appends another buffer to this buffer. 542 void pushBack(ref Vec other) @trusted 543 { 544 size_t oldSize = _size; 545 resizeGrow(_size + other._size); 546 memmove(_data + oldSize, other._data, T.sizeof * other._size); 547 } 548 549 /// Appends a slice to this buffer. 550 /// `slice` should not belong to the same buffer _data. 551 void pushBack(T[] slice) @trusted 552 { 553 size_t oldSize = _size; 554 size_t newSize = _size + slice.length; 555 resizeGrow(newSize); 556 for (size_t n = 0; n < slice.length; ++n) 557 _data[oldSize + n] = slice[n]; 558 } 559 560 /// Returns: Raw pointer to data. 561 @property inout(T)* ptr() inout @system 562 { 563 return _data; 564 } 565 566 /// Returns: n-th element. 567 ref inout(T) opIndex(size_t i) pure inout @trusted 568 { 569 return _data[i]; 570 } 571 572 T opIndexAssign(T x, size_t i) pure @trusted 573 { 574 return _data[i] = x; 575 } 576 577 /// Sets size to zero, but keeps allocated buffers. 578 void clearContents() pure @safe 579 { 580 _size = 0; 581 } 582 583 /// Returns: Whole content of the array in one slice. 584 inout(T)[] opSlice() inout @safe 585 { 586 return opSlice(0, length()); 587 } 588 589 /// Returns: A slice of the array. 590 inout(T)[] opSlice(size_t i1, size_t i2) inout @trusted 591 { 592 return _data[i1 .. i2]; 593 } 594 595 /// Fills the buffer with the same value. 596 void fill(T x) @trusted 597 { 598 _data[0.._size] = x; 599 } 600 601 /// Move. Give up owner ship of the data. 602 T[] releaseData() @system 603 { 604 T[] data = _data[0.._size]; 605 assert(_alignment == 1); // else would need to be freed with alignedFree. 606 this._data = null; 607 this._size = 0; 608 this._allocated = 0; 609 this._alignment = 0; 610 return data; 611 } 612 } 613 614 private 615 { 616 size_t _size = 0; 617 T* _data = null; 618 size_t _allocated = 0; 619 size_t _alignment = 1; // for an unaligned Vec, you probably are not interested in alignment 620 621 /// Used internally to grow in response to a pushBack operation. 622 /// Different heuristic, since we know that the resize is likely to be repeated for an 623 /// increasing size later. 624 void resizeGrow(size_t askedSize) @trusted 625 { 626 if (_allocated < askedSize) 627 { 628 629 version(all) 630 { 631 size_t newCap = computeNewCapacity(askedSize, _size); 632 setCapacity(newCap); 633 } 634 else 635 { 636 setCapacity(2 * askedSize); 637 } 638 } 639 _size = askedSize; 640 } 641 642 // Resizes the `Vector` to hold exactly `askedSize` elements. 643 // Still if the allocated capacity is larger, do nothing. 644 void resizeExactly(size_t askedSize) @trusted 645 { 646 if (_allocated < askedSize) 647 { 648 setCapacity(askedSize); 649 } 650 _size = askedSize; 651 } 652 653 // Internal use, realloc internal buffer, copy existing items. 654 // Doesn't initialize the new ones. 655 void setCapacity(size_t cap) 656 { 657 size_t numBytes = cap * T.sizeof; 658 _data = cast(T*)(alignedRealloc(_data, numBytes, _alignment)); 659 _allocated = cap; 660 } 661 662 // Compute new capacity, while growing. 663 size_t computeNewCapacity(size_t newLength, size_t oldLength) 664 { 665 // Optimal value (Windows malloc) not far from there. 666 enum size_t PAGESIZE = 4096; 667 668 size_t newLengthBytes = newLength * T.sizeof; 669 if (newLengthBytes > PAGESIZE) 670 { 671 // Larger arrays need a smaller growth factor to avoid wasting too much bytes. 672 // This was found when tracing curve, not too far from golden ratio for some reason. 673 return newLength + newLength / 2 + newLength / 8; 674 } 675 else 676 { 677 // For smaller arrays being pushBack, can bring welcome speed by minimizing realloc. 678 return newLength * 3; 679 } 680 } 681 } 682 } 683 684 unittest 685 { 686 import std.range.primitives; 687 static assert(isOutputRange!(Vec!ubyte, ubyte)); 688 689 690 import std.random; 691 int NBUF = 200; 692 693 Xorshift32 rng; 694 rng.seed(0xBAADC0DE); 695 696 struct box2i { int a, b, c, d; } 697 Vec!box2i[] boxes; 698 boxes.length = NBUF; 699 700 foreach(i; 0..NBUF) 701 { 702 boxes[i] = makeVec!box2i(); 703 } 704 705 foreach(j; 0..200) 706 { 707 foreach(i; 0..NBUF) 708 { 709 int previousSize = cast(int)(boxes[i].length); 710 void* previousPtr = boxes[i].ptr; 711 foreach(int k; 0..cast(int)(boxes[i].length)) 712 boxes[i][k] = box2i(k, k, k, k); 713 714 int newSize = uniform(0, 100, rng); 715 boxes[i].resize(newSize); 716 717 int minSize = previousSize; 718 if (minSize > boxes[i].length) 719 minSize = cast(int)(boxes[i].length); 720 721 void* newPtr = boxes[i].ptr; 722 foreach(int k; 0..minSize) 723 { 724 box2i item = boxes[i][k]; 725 box2i shouldBe = box2i(k, k, k, k); 726 assert(item == shouldBe); 727 } 728 729 int sum = 0; 730 foreach(k; 0..newSize) 731 { 732 box2i bb = boxes[i][k]; 733 sum += bb.a + bb.b + bb.c + bb.d; 734 } 735 } 736 } 737 738 foreach(i; 0..NBUF) 739 boxes[i].destroy(); 740 741 { 742 auto buf = makeVec!int; 743 enum N = 10; 744 buf.resize(N); 745 foreach(i ; 0..N) 746 buf[i] = i; 747 748 foreach(i ; 0..N) 749 assert(buf[i] == i); 750 751 auto buf2 = makeVec!int; 752 buf2.pushBack(11); 753 buf2.pushBack(14); 754 755 // test pushBack(slice) 756 buf.clearContents(); 757 buf.pushBack(buf2[]); 758 assert(buf[0] == 11); 759 assert(buf[1] == 14); 760 761 // test pushBack(slice) 762 buf2[1] = 8; 763 buf.clearContents(); 764 buf.pushBack(buf2); 765 assert(buf[0] == 11); 766 assert(buf[1] == 8); 767 } 768 } 769 770 // Vec should work without any initialization 771 unittest 772 { 773 Vec!string vec; 774 775 foreach(e; vec[]) 776 { 777 } 778 779 assert(vec.length == 0); 780 assert(vec.size == 0); 781 assert(vec.slength == 0); 782 assert(vec.ssize == 0); 783 vec.clearContents(); 784 vec.resize(0); 785 assert(vec == vec.init); 786 vec.fill("filler"); 787 assert(vec.ptr is null); 788 } 789 790 // Issue #312: vec.opIndex not returning ref which break struct assignment 791 unittest 792 { 793 static struct A 794 { 795 int x; 796 } 797 Vec!A vec = makeVec!A(); 798 A a; 799 vec.pushBack(a); 800 vec ~= a; 801 vec[0].x = 42; // vec[0] needs to return a ref 802 assert(vec[0].x == 42); 803 } 804 unittest // removeAndShiftRestOfArray was wrong 805 { 806 Vec!int v; 807 v.pushBack(14); 808 v.pushBack(27); 809 v.pushBack(38); 810 assert(v.length == 3); 811 v.removeAndShiftRestOfArray(1); 812 assert(v.length == 2); 813 assert(v[] == [14, 38]); 814 } 815 unittest // removeAndShiftRestOfArray with slice 816 { 817 Vec!int v; 818 v.pushBack([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); 819 v.removeAndShiftRestOfArray(3, 5); 820 assert(v[] == [0, 1, 2, 5, 6, 7, 8, 9]); 821 v.removeAndShiftRestOfArray(0, 4); 822 assert(v[] == [6, 7, 8, 9]); 823 v.removeAndShiftRestOfArray(2, 4); 824 assert(v[] == [6, 7]); 825 v.removeAndShiftRestOfArray(2, 2); 826 v.removeAndShiftRestOfArray(0, 0); 827 v.removeAndShiftRestOfArray(1, 1); 828 assert(v[] == [6, 7]); 829 } 830 831 832 833 /// Allows to merge the allocation of several arrays, which saves allocation count and can speed up things thanks to locality. 834 /// 835 /// Example: see below unittest. 836 struct MergedAllocation 837 { 838 nothrow: 839 @nogc: 840 841 // In debug mode, add stomp detection to `MergedAllocation`. 842 // This takes additional complexity to coarse check for stomping nearby buffers. 843 debug 844 { 845 private enum mergedAllocStompWarning = true; 846 } 847 else 848 { 849 private enum mergedAllocStompWarning = false; 850 } 851 852 853 // This adds 32-byte of sentinels around each allocation, 854 // and check at the end of the program that they were unaffected. 855 static if (mergedAllocStompWarning) 856 { 857 // Number of bytes to write between areas to check for stomping. 858 enum SENTINEL_BYTES = 32; 859 } 860 861 enum maxExpectedAlignment = 32; 862 863 /// Start defining the area of allocations. 864 void start() 865 { 866 // Detect memory errors in former uses. 867 static if (mergedAllocStompWarning) 868 { 869 checkSentinelAreas(); 870 _allocateWasCalled = false; 871 } 872 873 _base = cast(ubyte*)(cast(size_t)0); 874 } 875 876 /// Allocate (or count) space needed for `numElems` elements of type `T` with given alignment. 877 /// This gets called twice for each array, see example for usage. 878 /// 879 /// This bumps the internal bump allocator. 880 /// Giving null to this chain and converting the result to size_t give the total needed size 881 /// for the merged allocation. 882 /// 883 /// Warning: 884 /// - If called after a `start()` call, the area returned are wrong and are only for 885 /// counting needed bytes. Don't use those. 886 /// 887 /// - If called after an `allocate()` call, the area returned are an actual merged 888 /// allocation (if the same calls are done). 889 /// 890 /// Warning: Prefer `allocArray` over `alloc` variant, since the extra length field WILL help 891 /// you catch memory errors before release. Else it is very common to miss buffer 892 /// overflows in samplerate changes. 893 void allocArray(T)(out T[] array, size_t numElems, size_t alignment = 1) 894 { 895 assert(alignment <= maxExpectedAlignment); 896 assert( (alignment != 0) && ((alignment & (alignment - 1)) == 0)); // power of two 897 898 size_t adr = cast(size_t) _base; 899 900 // 1. Align base address 901 size_t mask = ~(alignment - 1); 902 adr = (adr + alignment - 1) & mask; 903 904 // 2. Assign array and base. 905 array = (cast(T*)adr)[0..numElems]; 906 adr += T.sizeof * numElems; 907 _base = cast(ubyte*) adr; 908 909 static if (mergedAllocStompWarning) 910 { 911 if (_allocateWasCalled && _allocation !is null) 912 { 913 // Each allocated area followed with SENTINEL_BYTES bytes of value 0xCC 914 _base[0..SENTINEL_BYTES] = 0xCC; 915 registerSentinel(_base); 916 } 917 _base += SENTINEL_BYTES; 918 } 919 } 920 921 ///ditto 922 void alloc(T)(out T* array, size_t numElems, size_t alignment = 1) 923 { 924 T[] arr; 925 allocArray(arr, numElems, alignment); 926 array = arr.ptr; 927 } 928 929 /// Allocate actual storage for the merged allocation. From there, you need to define exactly the same area with `alloc` and `allocArray`. 930 /// This time they will get a proper value. 931 void allocate() 932 { 933 static if (mergedAllocStompWarning) _allocateWasCalled = true; 934 935 size_t sizeNeeded = cast(size_t)_base; // since it was fed 0 at start. 936 937 if (sizeNeeded == 0) 938 { 939 // If no bytes are requested, it means no buffer were requested, or only with zero size. 940 // We will return a null pointer in that case, since accessing them would be illegal anyway. 941 _allocation = null; 942 } 943 else 944 { 945 // the merged allocation needs to have the largest expected alignment, else the size could depend on the hazards 946 // of the allocation. With maximum alignment, padding is the same so long as areas have smaller or equal alignment requirements. 947 _allocation = cast(ubyte*) _mm_realloc(_allocation, sizeNeeded, maxExpectedAlignment); 948 } 949 950 // So that the next layout call points to the right area. 951 _base = _allocation; 952 } 953 954 ~this() 955 { 956 static if (mergedAllocStompWarning) 957 { 958 checkSentinelAreas(); 959 } 960 961 if (_allocation != null) 962 { 963 _mm_free(_allocation); 964 _allocation = null; 965 } 966 } 967 968 private: 969 970 // Location of the allocation. 971 ubyte* _allocation = null; 972 973 /// 974 ubyte* _base = null; 975 976 static if (mergedAllocStompWarning) 977 { 978 bool _allocateWasCalled = false; 979 980 Vec!(ubyte*) _sentinels; // start of sentinel area (SENTINAL_BYTES long) 981 982 void registerSentinel(ubyte* start) 983 { 984 _sentinels.pushBack(start); 985 } 986 987 bool hasMemoryError() // true if sentinel bytes stomped 988 { 989 assert(_allocateWasCalled && _allocation !is null); 990 991 foreach(ubyte* s; _sentinels[]) 992 { 993 for (int n = 0; n < 32; ++n) 994 { 995 if (s[n] != 0xCC) 996 return true; 997 } 998 } 999 return false; 1000 } 1001 1002 // Check existing sentinels, and unregister them 1003 void checkSentinelAreas() 1004 { 1005 if (!_allocateWasCalled) 1006 return; // still haven't done an allocation, nothing to check. 1007 1008 if (_allocation is null) 1009 return; // nothing to check 1010 1011 // If you fail here, there is a memory error in your access patterns. 1012 // Sentinel bytes of value 0xCC were overwritten in a `MergedAllocation`. 1013 // You can use slices with `allocArray` instead of `alloc` to find the faulty 1014 // access. This check doesn't catch everything! 1015 assert(! hasMemoryError()); 1016 1017 _sentinels.clearContents(); 1018 } 1019 } 1020 } 1021 1022 1023 // Here is how you should use MergedAllocation. 1024 unittest 1025 { 1026 static struct MyDSPStruct 1027 { 1028 public: 1029 nothrow: 1030 @nogc: 1031 void initialize(int maxFrames) 1032 { 1033 _mergedAlloc.start(); 1034 layout(_mergedAlloc, maxFrames); // you need such a layout function to be called twice. 1035 _mergedAlloc.allocate(); 1036 layout(_mergedAlloc, maxFrames); // the first time arrays area allocated in the `null` area, the second time in 1037 // actually allocated memory (since we now have the needed length). 1038 } 1039 1040 void layout(ref MergedAllocation ma, int maxFrames) 1041 { 1042 // allocate `maxFrames` elems, and return a slice in `_intermediateBuf`. 1043 ma.allocArray(_intermediateBuf, maxFrames); 1044 1045 // allocate `maxFrames` elems, aligned to 16-byte boundaries. Return a pointer to that in `_coeffs`. 1046 ma.alloc(_coeffs, maxFrames, 16); 1047 } 1048 1049 private: 1050 float[] _intermediateBuf; 1051 double* _coeffs; 1052 MergedAllocation _mergedAlloc; 1053 } 1054 1055 MyDSPStruct s; 1056 s.initialize(14); 1057 s._coeffs[0..14] = 1.0f; 1058 s._intermediateBuf[0..14] = 1.0f; 1059 s.initialize(17); 1060 s._coeffs[0..17] = 1.0f; 1061 s._intermediateBuf[0..17] = 1.0f; 1062 } 1063 1064 // Should be valid to allocate nothing with a MergedAllocation. 1065 unittest 1066 { 1067 MergedAllocation ma; 1068 ma.start(); 1069 ma.allocate(); 1070 assert(ma._allocation == null); 1071 } 1072 1073 // test stomping detection 1074 unittest 1075 { 1076 MergedAllocation ma; 1077 ma.start(); 1078 ubyte[] arr, arr2; 1079 ma.allocArray(arr, 17); 1080 ma.allocArray(arr2, 24); 1081 assert(ma._allocation == null); 1082 1083 ma.allocate(); 1084 ma.allocArray(arr, 17); 1085 ma.allocArray(arr2, 24); 1086 assert(ma._allocation != null); 1087 assert(arr.length == 17); 1088 assert(arr2.length == 24); 1089 1090 // Test memory error detection with a simple case. 1091 static if (MergedAllocation.mergedAllocStompWarning) 1092 { 1093 assert(!ma.hasMemoryError()); 1094 1095 // Create a memory error 1096 arr.ptr[18] = 2; 1097 assert(ma.hasMemoryError()); 1098 arr.ptr[18] = 0xCC; // undo the error to avoid stopping the unittests 1099 } 1100 }