1 /** 2 * Miscellaneous functions for dplug:canvas internals. 3 * 4 * Copyright: Copyright Chris Jones 2020. 5 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 6 */ 7 module dplug.canvas.misc; 8 9 import core.stdc.stdlib : malloc, free, realloc; 10 public import inteli; 11 public import std.math : sqrt, abs; 12 13 nothrow: 14 @nogc: 15 16 version(LDC) 17 { 18 import ldc.intrinsics; 19 20 alias intr_bsf = llvm_ctlz; 21 alias intr_bsr = llvm_cttz; 22 alias fabs = llvm_fabs; // DMD fabs sucks 23 } 24 else version(DigitalMars) 25 { 26 import core.bitop; 27 28 T intr_bsr(T)(T src, bool isZeroUndefined) 29 { 30 assert(isZeroUndefined); 31 return bsf(src); // Note: llvm_cttz corresponds to bsf in DMD not bsr 32 } 33 } 34 35 T min(T)(T a, T b) 36 { 37 return (a < b) ? a : b; 38 } 39 40 T max(T)(T a, T b) 41 { 42 return (a > b) ? a : b; 43 } 44 45 T clip(T)(T x, T min, T max) 46 { 47 if (x < min) return min; 48 if (x > max) return max; 49 return x; 50 } 51 52 // round x up to next multiple of q 53 54 uint roundUpTo(uint x, uint q) 55 { 56 uint tmp = x % q; 57 return (tmp) ? x - tmp + q : x; 58 } 59 60 // round x up to next multiple of q 61 62 uint roundUpPow2(uint x) 63 { 64 x--; 65 x |= x >> 1; 66 x |= x >> 2; 67 x |= x >> 4; 68 x |= x >> 8; 69 x |= x >> 16; 70 return x+1; 71 } 72 73 ulong roundUpPow2(ulong x) 74 { 75 x--; 76 x |= x >> 1; 77 x |= x >> 2; 78 x |= x >> 4; 79 x |= x >> 8; 80 x |= x >> 16; 81 x |= x >> 32; 82 return x+1; 83 } 84 85 // is power of 2 86 87 bool isPow2(int x) 88 { 89 return ! ((x - 1) & x); 90 } 91 92 /* 93 broadcast alpha 94 95 x is [A2,R2,G2,B2,A1,R1,G1,B1], 16 bit components with lower 8 bits used 96 returns [A2,A2,A2,A2,A1,A1,A1,A1], 16 bits used 97 two versions, shuffleVector should lower to pshufb, but it is a bit slower on 98 my CPU, maybe from increased register pressure? 99 */ 100 101 __m128i _mm_broadcast_alpha(__m128i x) 102 { 103 x = _mm_shufflelo_epi16!255(x); 104 x = _mm_shufflehi_epi16!255(x); 105 return _mm_slli_epi16(x,8); 106 } 107 108 // Used for clamping 4 LUT indices to valid values. 109 __m128i _mm_clamp_0_to_N_epi32(__m128i v, short max) 110 { 111 // turn into shorts to be able to use min and max functions 112 // this preserve signedness 113 // _mm_max_epi32 exists but in SSE4.1 114 v = _mm_packs_epi32(v, _mm_setzero_si128()); 115 116 // Clip to zero if negative 117 v = _mm_max_epi16(v, _mm_setzero_si128()); 118 119 // Clip to max if above 120 v = _mm_min_epi16(v, _mm_set1_epi16(max)); 121 122 // Expand back to 32-bit 123 return _mm_unpacklo_epi16(v, _mm_setzero_si128()); 124 } 125 126 /* 127 nextSetBit, searches the bit mask for the next set bit. 128 129 mask - array that holds the bits 130 start - start position 131 end - end position 132 133 returns : index of next set bit, or "end" if none found 134 135 note the mask should be long enough in the given word size to hold 136 the bits, IE. If end = 65, then the uint mask should be 3 uints, 137 the ulong mask should be 2 ulongs. If end = 64, then it only 138 need be 2 uints or 1 ulong. 139 */ 140 141 int nextSetBit(ulong* mask, int start, int end) 142 { 143 assert((start >= 0) && (start < end)); 144 145 int nsb = start; 146 int idx = nsb>>6; 147 ulong bits = mask[idx] >> (nsb & 63); 148 149 if (bits == 0) 150 { 151 idx++; 152 int msklen = (end+63)>>6; 153 while (idx < msklen) 154 { 155 if (mask[idx] != 0) 156 { 157 nsb = idx*64 + cast(int) intr_bsr(mask[idx],true); 158 if (nsb > end) nsb = end; 159 return nsb; 160 } 161 idx++; 162 } 163 return end; 164 } 165 nsb = nsb + cast(int) intr_bsr(bits,true); 166 if (nsb > end) nsb = end; 167 return nsb; 168 } 169 170 int nextSetBit(uint* mask, int start, int end) 171 { 172 assert((start >= 0) && (start < end)); 173 174 int nsb = start; 175 int idx = nsb>>5; 176 uint bits = mask[idx] >> (nsb & 31); 177 178 if (bits == 0) 179 { 180 idx++; 181 int msklen = (end+31)>>5; 182 while (idx < msklen) 183 { 184 if (mask[idx] != 0) 185 { 186 nsb = idx*32 + cast(int) intr_bsr(mask[idx],true); 187 if (nsb > end) nsb = end; 188 return nsb; 189 } 190 idx++; 191 } 192 return end; 193 } 194 nsb = nsb + cast(int) intr_bsr(bits,true); 195 if (nsb > end) nsb = end; 196 return nsb; 197 } 198 199 /* 200 Arena Allocator, very fast allocation, free all memory at once. Essentialy 201 it is a linked list of memory blocks and allocation is sequential through 202 each block and on to the next. If it runs out of blocks it allocates and 203 adds a new one to the end of the linked list. Reset() resets the allocator 204 to the begining of the first block. Nothing is freed back to the C allocator 205 until the destructor is called. No init or clean up is done of the memory. 206 */ 207 208 struct ArenaAllocator(T, uint blockSize) 209 { 210 nothrow: 211 @nogc: 212 struct EABlock 213 { 214 EABlock* next; 215 T[blockSize] items; 216 } 217 218 EABlock* m_root; 219 EABlock* m_block; 220 uint m_pos = uint.max; 221 222 // note: m_pos is set to uint.max if no blocks are allocated yet. This avoids 223 // having to do two conditional tests in the fast path of allocate() method. 224 225 public: 226 227 ~this() 228 { 229 while (m_root) 230 { 231 EABlock* tmp = m_root; 232 m_root = m_root.next; 233 free(tmp); 234 } 235 } 236 237 T* allocate() 238 { 239 if (m_pos < blockSize) 240 { 241 return &m_block.items[m_pos++]; 242 } 243 else 244 { 245 if (m_block) 246 { 247 if (m_block.next) 248 { 249 m_block = m_block.next; 250 m_pos = 0; 251 return &m_block.items[m_pos++]; 252 } 253 else 254 { 255 void* tmp = malloc(EABlock.sizeof); 256 if (!tmp) assert(0); // no mem abandon ship! 257 m_block.next = cast(EABlock*) tmp; 258 m_block = m_block.next; 259 m_block.next = null; 260 m_pos = 0; 261 return &m_block.items[m_pos++]; 262 } 263 } 264 else 265 { 266 void* tmp = malloc(EABlock.sizeof); 267 if (!tmp) assert(0); // no mem abandon ship! 268 m_root = cast(EABlock*) tmp; 269 m_block = m_root; 270 m_block.next = null; 271 m_pos = 0; 272 return &m_block.items[m_pos++]; 273 } 274 } 275 } 276 277 void reset() 278 { 279 m_block = m_root; 280 m_pos = (m_root) ? 0 : uint.max; 281 } 282 }