1 /**
2 * Miscellaneous functions for dplug:canvas internals. 
3 *
4 * Copyright: Copyright Chris Jones 2020.
5 * License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
6 */
7 module dplug.canvas.misc;
8 
9 import core.stdc.stdlib : malloc, free, realloc;
10 public import inteli;
11 public import std.math : sqrt, abs;
12 
13 nothrow:
14 @nogc:
15 
16 version(LDC)
17 {
18     import ldc.intrinsics;
19 
20     alias intr_bsf = llvm_ctlz;
21     alias intr_bsr = llvm_cttz;
22     alias fabs = llvm_fabs;        // DMD fabs sucks
23 }
24 else version(DigitalMars)
25 {
26     import core.bitop;
27 
28     T intr_bsr(T)(T src, bool isZeroUndefined)
29     {
30         assert(isZeroUndefined);
31         return bsf(src); // Note: llvm_cttz corresponds to bsf in DMD not bsr
32     }
33 }
34 
35 T min(T)(T a, T b)
36 {
37     return (a < b) ? a : b;
38 }
39 
40 T max(T)(T a, T b)
41 {
42     return (a > b) ? a : b;
43 }
44 
45 T clip(T)(T x, T min, T max)
46 {
47     if (x < min) return min;
48     if (x > max) return max;
49     return x;
50 }
51 
52 // round x up to next multiple of q
53 
54 uint roundUpTo(uint x, uint q)
55 {
56     uint tmp = x % q;
57     return (tmp) ? x - tmp + q : x;
58 }
59 
60 // round x up to next multiple of q
61 
62 uint roundUpPow2(uint x)
63 {
64     x--;
65     x |= x >> 1;
66     x |= x >> 2;
67     x |= x >> 4;
68     x |= x >> 8;
69     x |= x >> 16;
70     return x+1;
71 }
72 
73 ulong roundUpPow2(ulong x)
74 {
75     x--;
76     x |= x >> 1;
77     x |= x >> 2;
78     x |= x >> 4;
79     x |= x >> 8;
80     x |= x >> 16;
81     x |= x >> 32;
82     return x+1;
83 }
84 
85 // is power of 2
86 
87 bool isPow2(int x)
88 {
89     return ! ((x - 1) & x);
90 }
91 
92 /*
93   broadcast alpha
94 
95   x is [A2,R2,G2,B2,A1,R1,G1,B1], 16 bit components with lower 8 bits used
96   returns [A2,A2,A2,A2,A1,A1,A1,A1], 16 bits used
97   two versions, shuffleVector should lower to pshufb, but it is a bit slower on
98   my CPU, maybe from increased register pressure?
99 */
100 
101 __m128i _mm_broadcast_alpha(__m128i x)
102 {
103     x = _mm_shufflelo_epi16!255(x);
104     x = _mm_shufflehi_epi16!255(x);
105     return _mm_slli_epi16(x,8);
106 }
107 
108 // Used for clamping 4 LUT indices to valid values.
109 __m128i _mm_clamp_0_to_N_epi32(__m128i v, short max)
110 {
111     // turn into shorts to be able to use min and max functions
112     // this preserve signedness
113     // _mm_max_epi32 exists but in SSE4.1
114     v = _mm_packs_epi32(v, _mm_setzero_si128());
115 
116     // Clip to zero if negative
117     v = _mm_max_epi16(v, _mm_setzero_si128());
118 
119     // Clip to max if above
120     v = _mm_min_epi16(v, _mm_set1_epi16(max));
121 
122     // Expand back to 32-bit
123     return _mm_unpacklo_epi16(v, _mm_setzero_si128());
124 }
125 
126 /*
127   nextSetBit, searches the bit mask for the next set bit. 
128 
129   mask  - array that holds the bits
130   start - start position
131   end   - end position
132 
133   returns : index of next set bit, or "end" if none found
134 
135   note the mask should be long enough in the given word size to hold
136   the bits, IE. If end = 65, then the uint mask should be 3 uints,
137   the ulong mask should be 2 ulongs. If end = 64, then it only
138   need be 2 uints or 1 ulong.
139 */
140 
141 int nextSetBit(ulong* mask, int start, int end)
142 {
143     assert((start >= 0) && (start < end));
144 
145     int nsb = start;
146     int idx = nsb>>6;
147     ulong bits = mask[idx] >> (nsb & 63); 
148 
149     if (bits == 0)
150     {
151         idx++;
152         int msklen = (end+63)>>6;
153         while (idx < msklen)
154         {
155             if (mask[idx] != 0)
156             {
157                 nsb = idx*64 + cast(int) intr_bsr(mask[idx],true);
158                 if (nsb > end) nsb = end;
159                 return nsb;
160             }
161             idx++;
162         }
163         return end;
164     }
165     nsb = nsb + cast(int) intr_bsr(bits,true);
166     if (nsb > end) nsb = end;
167     return nsb;
168 }
169 
170 int nextSetBit(uint* mask, int start, int end)
171 {
172     assert((start >= 0) && (start < end));
173 
174     int nsb = start;
175     int idx = nsb>>5;
176     uint bits = mask[idx] >> (nsb & 31); 
177 
178     if (bits == 0)
179     {
180         idx++;
181         int msklen = (end+31)>>5;
182         while (idx < msklen)
183         {
184             if (mask[idx] != 0)
185             {
186                 nsb = idx*32 + cast(int) intr_bsr(mask[idx],true);
187                 if (nsb > end) nsb = end;
188                 return nsb;
189             }
190             idx++;
191         }
192         return end;
193     }
194     nsb = nsb + cast(int) intr_bsr(bits,true);
195     if (nsb > end) nsb = end;
196     return nsb;
197 }
198 
199 /*
200   Arena Allocator, very fast allocation, free all memory at once. Essentialy
201   it is a linked list of memory blocks and allocation is sequential through
202   each block and on to the next. If it runs out of blocks it allocates and
203   adds a new one to the end of the linked list. Reset() resets the allocator
204   to the begining of the first block. Nothing is freed back to the C allocator
205   until the destructor is called. No init or clean up is done of the memory.
206 */
207 
208 struct ArenaAllocator(T, uint blockSize)
209 {  
210 nothrow:
211 @nogc:
212     struct EABlock
213     {
214         EABlock* next;
215         T[blockSize] items;
216     }
217 
218     EABlock* m_root;
219     EABlock* m_block;
220     uint m_pos = uint.max;
221 
222     // note: m_pos is set to uint.max if no blocks are allocated yet. This avoids
223     // having to do two conditional tests in the fast path of allocate() method.
224 
225 public:
226 
227     ~this()
228     {
229         while (m_root)
230         {
231             EABlock* tmp = m_root;
232             m_root = m_root.next;
233             free(tmp);
234         }
235     }
236 
237     T* allocate()
238     {
239         if (m_pos < blockSize)
240         {
241             return &m_block.items[m_pos++];
242         }
243         else
244         {
245             if (m_block)
246             {
247                 if (m_block.next)
248                 {
249                     m_block = m_block.next;
250                     m_pos = 0;
251                     return &m_block.items[m_pos++];
252                 }
253                 else
254                 {
255                     void* tmp = malloc(EABlock.sizeof);
256                     if (!tmp) assert(0); // no mem abandon ship!
257                     m_block.next = cast(EABlock*) tmp;
258                     m_block = m_block.next;
259                     m_block.next = null;
260                     m_pos = 0;
261                     return &m_block.items[m_pos++];
262                 }
263             }
264             else
265             {
266                 void* tmp = malloc(EABlock.sizeof);
267                 if (!tmp) assert(0); // no mem abandon ship!
268                 m_root = cast(EABlock*) tmp;
269                 m_block = m_root;
270                 m_block.next = null;
271                 m_pos = 0;
272                 return &m_block.items[m_pos++];
273             }
274         }
275     }
276 
277     void reset()
278     {
279         m_block = m_root;
280         m_pos = (m_root) ? 0 : uint.max;
281     }
282 }