1 //          Copyright Jernej Krempuš 2012
2 // Distributed under the Boost Software License, Version 1.0.
3 //    (See accompanying file LICENSE_1_0.txt or copy at
4 //          http://www.boost.org/LICENSE_1_0.txt)
5 
6 module dplug.fft.pfft;
7 import core.stdc.stdlib;
8 import core.stdc.string: memcpy;
9 import core.exception,
10        core.bitop,
11        std.array;
12 
13 template Import(TT)
14 {
15     static if(is(TT == float))
16         import impl = dplug.fft.impl_float;
17     else static if(is(TT == double))
18         import impl = dplug.fft.impl_double;
19     else
20         static assert(0, "Not implemented");
21 }
22 
23 template st(alias a){ enum st = cast(size_t) a; }
24 
25 /**
26 A class for calculating discrete fourier transform. The methods of this class
27 use split format for complex data. This means that a complex data set is
28 represented as two arrays - one for the real part and one for the imaginary
29 part. An instance of this class can only be used for transforms of one
30 particular size. The template parameter is the floating point type that the
31 methods of the class will operate on.
32 
33 Example:
34 ---
35 import std.stdio, std.conv, std.exception;
36 import pfft.pfft;
37 
38 void main(string[] args)
39 {
40     auto n = to!int(args[1]);
41     enforce((n & (n-1)) == 0, "N must be a power of two.");
42 
43     alias Fft!float F;
44 
45     F f;
46     f.initialize(n);
47 
48     auto re = F.allocate(n);
49     auto im = F.allocate(n);
50 
51     foreach(i, _; re)
52         readf("%s %s\n", &re[i], &im[i]);
53 
54     f.fft(re, im);
55 
56     foreach(i, _; re)
57         writefln("%s\t%s", re[i], im[i]);
58 }
59 ---
60  */
61 struct Fft(T)
62 {
63 public:
64 nothrow:
65 @nogc:
66     mixin Import!T;
67 
68     int log2n;
69     impl.Table table;
70 
71 /**
72 The Fft constructor. The parameter is the size of data sets that $(D fft) and
73 $(D ifft) will operate on. I will refer to this number as n in the rest of the
74 documentation for this class.Tables used in fft and ifft are calculated in the
75 constructor.
76  */
77     void initialize(size_t n)
78     {
79         assert((n & (n - 1)) == 0);
80         log2n  = bsf(n);
81         auto mem = alignedRealloc(table, impl.table_size_bytes(log2n), 64);
82         table = impl.fft_table(log2n, mem);
83         assert(mem == table);
84     }
85 
86     ~this()
87     {
88         alignedFree(table, 64);
89     }
90 
91 /**
92 Calculates discrete fourier transform. $(D_PARAM re) should contain the real
93 part of the data and $(D_PARAM im) the imaginary part of the data. The method
94 operates in place - the result is saved back to $(D_PARAM re) and $(D_PARAM im).
95 Both arrays must be properly aligned - to obtain a properly aligned array you can
96 use $(D allocate).
97  */
98     void fft(T[] re, T[] im)
99     {
100         assert(re.length == im.length);
101         assert(re.length == (st!1 << log2n));
102         assert(((impl.alignment(re.length) - 1) & cast(size_t) re.ptr) == 0);
103         assert(((impl.alignment(im.length) - 1) & cast(size_t) im.ptr) == 0);
104 
105         impl.fft(re.ptr, im.ptr, log2n, table);
106     }
107 
108 /**
109 Calculates inverse discrete fourier transform scaled by n. The arguments have
110 the same role as they do in $(D fft).
111  */
112     void ifft(T[] re, T[] im)
113     {
114         fft(im, re);
115     }
116 
117 /**
118     Returns requited alignment for use with $(D fft), $(D ifft) and
119     $(D scale) methods.
120  */
121     static size_t alignment(size_t n)
122     {
123         return impl.alignment(n);
124     }
125 
126 /**
127 Allocates an array that is aligned properly for use with $(D fft), $(D ifft) and
128 $(D scale) methods.
129  */
130     static T[] allocate(size_t n)
131     {
132         size_t bytes = n * T.sizeof;
133         T* r = cast(T*) alignedMalloc(bytes, alignment(bytes));
134         assert(((impl.alignment(bytes) - 1) & cast(size_t) r) == 0);
135         return r[0..n];
136     }
137 
138 /**
139 Deallocates an array allocated with `allocate`.
140 */
141     static void deallocate(T[] arr)
142     {
143         size_t n = arr.length;
144         alignedFree(arr.ptr, alignment(n));
145     }
146 
147 
148 /**
149 Scales an array data by factor k. The array must be properly aligned. To obtain
150 a properly aligned array, use $(D allocate).
151  */
152     static void scale(T[] data, T k)
153     {
154         assert(((impl.alignment(data.length) - 1) & cast(size_t) data.ptr) == 0);
155         impl.scale(data.ptr, data.length, k);
156     }
157 
158 }
159 
160 /**
161 A class for calculating real discrete fourier transform. The methods of this
162 class use split format for complex data. This means that complex data set is
163 represented as two arrays - one for the real part and one for the imaginary
164 part. An instance of this class can only be used for transforms of one
165 particular size. The template parameter is the floating point type that the
166 methods of the class will operate on.
167 
168 Example:
169 ---
170 import std.stdio, std.conv, std.exception;
171 import pfft.pfft;
172 
173 void main(string[] args)
174 {
175     auto n = to!int(args[1]);
176     enforce((n & (n-1)) == 0, "N must be a power of two.");
177 
178     alias Rfft!float F;
179 
180     F f;
181     f.initialize(n);
182 
183     auto data = F.allocate(n);
184 
185     foreach(ref e; data)
186         readf("%s\n", &e);
187 
188     f.rfft(data);
189 
190     foreach(i; 0 .. n / 2 + 1)
191         writefln("%s\t%s", data[i], (i == 0 || i == n / 2) ? 0 : data[i]);
192 }
193 ---
194  */
195 struct Rfft(T)
196 {
197 public:
198 nothrow:
199 @nogc:
200     mixin Import!T;
201 
202     int log2n;
203     Fft!T _complex;
204     impl.RTable rtable;
205     impl.ITable itable;
206 
207 /**
208 The Rfft constructor. The parameter is the size of data sets that $(D rfft) will
209 operate on. I will refer to this number as n in the rest of the documentation
210 for this class. All tables used in rfft are calculated in the constructor.
211  */
212     void initialize(size_t n)
213     {
214         // Doesn't work with lower size, but I'm unable to understand why
215         assert(n >= 128); 
216 
217         assert((n & (n - 1)) == 0);
218         log2n  = bsf(n);
219 
220         _complex.initialize(n / 2);
221 
222         auto mem = alignedRealloc(rtable, impl.rtable_size_bytes(log2n), 64);
223         rtable = impl.rfft_table(log2n, mem);
224         assert(mem == rtable);
225 
226         mem = alignedRealloc(itable, impl.itable_size_bytes(log2n), 64);
227         itable = impl.interleave_table(log2n, mem);
228         assert(mem == itable);
229     }
230 
231     ~this()
232     {
233         alignedFree(rtable, 64);
234         alignedFree(itable, 64);
235     }
236 
237 /**
238 Calculates discrete fourier transform of the real valued sequence in data.
239 The method operates in place. When the method completes, data contains the
240 result. First $(I n / 2 + 1) elements contain the real part of the result and
241 the rest contains the imaginary part. Imaginary parts at position 0 and
242 $(I n / 2) are known to be equal to 0 and are not stored, so the content of
243 data looks like this:
244 
245  $(D r(0), r(1), ... r(n / 2), i(1), i(2), ... i(n / 2 - 1))
246 
247 
248 The elements of the result at position greater than n / 2 can be trivially
249 calculated from the relation $(I DFT(f)[i] = DFT(f)[n - i]*) that holds
250 because the input sequence is real.
251 
252 
253 The length of the array must be equal to n and the array must be properly
254 aligned. To obtain a properly aligned array you can use $(D allocate).
255  */
256     void rfft(T[] data)
257     {
258         assert(data.length == (st!1 << log2n));
259         assert(((impl.alignment(data.length) - 1) & cast(size_t) data.ptr) == 0);
260 
261         impl.deinterleave(data.ptr, log2n, itable);
262         impl.rfft(data.ptr, data[$ / 2 .. $].ptr, log2n, _complex.table, rtable);
263     }
264 
265 /**
266 Calculates the inverse of $(D rfft), scaled by n (You can use $(D scale)
267 to normalize the result). Before the method is called, data should contain a
268 complex sequence in the same format as the result of $(D rfft). It is
269 assumed that the input sequence is a discrete fourier transform of a real
270 valued sequence, so the elements of the input sequence not stored in data
271 can be calculated from $(I DFT(f)[i] = DFT(f)[n - i]*). When the method
272 completes, the array contains the real part of the inverse discrete fourier
273 transform. The imaginary part is known to be equal to 0.
274 
275 The length of the array must be equal to n and the array must be properly
276 aligned. To obtain a properly aligned array you can use $(D allocate).
277  */
278     void irfft(T[] data)
279     {
280         assert(data.length == (st!1 << log2n));
281         assert(((impl.alignment(data.length) - 1) & cast(size_t) data.ptr) == 0);
282 
283         impl.irfft(data.ptr, data[$ / 2 .. $].ptr, log2n, _complex.table, rtable);
284         impl.interleave(data.ptr, log2n, itable);
285     }
286 
287 /// An alias for Fft!T.allocate
288     alias Fft!(T).allocate allocate;
289 
290 /// An alias for Fft!T.deallocate
291     alias Fft!(T).deallocate deallocate;
292 
293 /// An alias for Fft!T.scale
294     alias Fft!(T).scale scale;
295 
296     /// An alias for Fft!T.alignment
297     alias Fft!(T).alignment alignment;
298 
299     @property complex(){ return _complex; }
300 }
301 
302 
303 private:
304 
305 /// Returns: `true` if the pointer is suitably aligned.
306 bool isPointerAligned(void* p, size_t alignment) pure nothrow @nogc
307 {
308     assert(alignment != 0);
309     return ( cast(size_t)p & (alignment - 1) ) == 0;
310 }
311 
312 /// Allocates an aligned memory chunk.
313 /// Functionally equivalent to Visual C++ _aligned_malloc.
314 /// Do not mix allocations with different alignment.
315 void* alignedMalloc(size_t size, size_t alignment) nothrow @nogc
316 {
317     assert(alignment != 0);
318 
319     // Short-cut and use the C allocator to avoid overhead if no alignment
320     if (alignment == 1)
321     {
322         // C99: 
323         // Implementation-defined behavior
324         // Whether the calloc, malloc, and realloc functions return a null pointer 
325         // or a pointer to an allocated object when the size requested is zero.
326         void* res = malloc(size);
327         if (size == 0)
328             return null;
329     }
330 
331     if (size == 0)
332         return null;
333 
334     size_t request = requestedSize(size, alignment);
335     void* raw = malloc(request);
336 
337     if (request > 0 && raw == null) // malloc(0) can validly return anything
338         onOutOfMemoryError();
339 
340     return storeRawPointerPlusSizeAndReturnAligned(raw, size, alignment);
341 }
342 
343 /// Frees aligned memory allocated by alignedMalloc or alignedRealloc.
344 /// Functionally equivalent to Visual C++ _aligned_free.
345 /// Do not mix allocations with different alignment.
346 void alignedFree(void* aligned, size_t alignment) nothrow @nogc
347 {
348     assert(alignment != 0);
349 
350     // Short-cut and use the C allocator to avoid overhead if no alignment
351     if (alignment == 1)
352         return free(aligned);
353 
354     // support for free(NULL)
355     if (aligned is null)
356         return;
357 
358     void** rawLocation = cast(void**)(cast(char*)aligned - size_t.sizeof);
359     free(*rawLocation);
360 }
361 
362 /// Returns: next pointer aligned with alignment bytes.
363 void* nextAlignedPointer(void* start, size_t alignment) pure nothrow @nogc
364 {
365     return cast(void*)nextMultipleOf(cast(size_t)(start), alignment);
366 }
367 
368 // Returns number of bytes to actually allocate when asking
369 // for a particular alignement
370 @nogc size_t requestedSize(size_t askedSize, size_t alignment) pure nothrow
371 {
372     enum size_t pointerSize = size_t.sizeof;
373     return askedSize + alignment - 1 + pointerSize * 2;
374 }
375 
376 // Store pointer given my malloc, and size in bytes initially requested (alignedRealloc needs it)
377 @nogc void* storeRawPointerPlusSizeAndReturnAligned(void* raw, size_t size, size_t alignment) nothrow
378 {
379     enum size_t pointerSize = size_t.sizeof;
380     char* start = cast(char*)raw + pointerSize * 2;
381     void* aligned = nextAlignedPointer(start, alignment);
382     void** rawLocation = cast(void**)(cast(char*)aligned - pointerSize);
383     *rawLocation = raw;
384     size_t* sizeLocation = cast(size_t*)(cast(char*)aligned - 2 * pointerSize);
385     *sizeLocation = size;
386     return aligned;
387 }
388 
389 // Returns: x, multiple of powerOfTwo, so that x >= n.
390 @nogc size_t nextMultipleOf(size_t n, size_t powerOfTwo) pure nothrow
391 {
392     // check power-of-two
393     assert( (powerOfTwo != 0) && ((powerOfTwo & (powerOfTwo - 1)) == 0));
394 
395     size_t mask = ~(powerOfTwo - 1);
396     return (n + powerOfTwo - 1) & mask;
397 }
398 
399 /// Reallocates an aligned memory chunk allocated by alignedMalloc or alignedRealloc.
400 /// Functionally equivalent to Visual C++ _aligned_realloc.
401 /// Do not mix allocations with different alignment.
402 @nogc void* alignedRealloc(void* aligned, size_t size, size_t alignment) nothrow
403 {
404     assert(isPointerAligned(aligned, alignment));
405 
406     // If you fail here, it can mean you've used an uninitialized AlignedBuffer.
407     assert(alignment != 0);
408 
409     // Short-cut and use the C allocator to avoid overhead if no alignment
410     if (alignment == 1)
411     {
412         void* res = realloc(aligned, size);
413 
414         // C99: 
415         // Implementation-defined behavior
416         // Whether the calloc, malloc, and realloc functions return a null pointer 
417         // or a pointer to an allocated object when the size requested is zero.
418         if (size == 0)
419             return null;
420 
421         return res;
422     }
423 
424     if (aligned is null)
425         return alignedMalloc(size, alignment);
426 
427     if (size == 0)
428     {
429         alignedFree(aligned, alignment);
430         return null;
431     }
432 
433     size_t previousSize = *cast(size_t*)(cast(char*)aligned - size_t.sizeof * 2);
434 
435 
436     void* raw = *cast(void**)(cast(char*)aligned - size_t.sizeof);
437     size_t request = requestedSize(size, alignment);
438 
439     // Heuristic: if a requested size is within 50% to 100% of what is already allocated
440     //            then exit with the same pointer
441     if ( (previousSize < request * 4) && (request <= previousSize) )
442         return aligned;
443 
444     void* newRaw = malloc(request);
445     static if( __VERSION__ > 2067 ) // onOutOfMemoryError wasn't nothrow before July 2014
446     {
447         if (request > 0 && newRaw == null) // realloc(0) can validly return anything
448             onOutOfMemoryError();
449     }
450 
451     void* newAligned = storeRawPointerPlusSizeAndReturnAligned(newRaw, request, alignment);
452     size_t minSize = size < previousSize ? size : previousSize;
453     memcpy(newAligned, aligned, minSize); // memcpy OK
454 
455     // Free previous data
456     alignedFree(aligned, alignment);
457     isPointerAligned(newAligned, alignment);
458     return newAligned;
459 }
460 
461 
462 unittest
463 {
464     {
465         int n = 16;
466         Fft!float A;
467         A.initialize(n);
468         float[] re = A.allocate(n);
469         float[] im = A.allocate(n);
470         scope(exit) A.deallocate(re);
471         scope(exit) A.deallocate(im);
472         re[] = 1.0f;
473         im[] = 0.0f;
474         A.fft(re, im);
475         A.ifft(re, im);
476     }
477 
478     {
479         int n = 128;
480         Rfft!float B;
481         B.initialize(n);
482         float[] data = B.allocate(n);
483         scope(exit) B.deallocate(data);
484         data[] = 1.0f;
485         B.rfft(data);
486         B.irfft(data);
487     }
488 }