1 // Copyright Jernej Krempuš 2012 2 // Distributed under the Boost Software License, Version 1.0. 3 // (See accompanying file LICENSE_1_0.txt or copy at 4 // http://www.boost.org/LICENSE_1_0.txt) 5 6 module dplug.fft.pfft; 7 import core.stdc.stdlib; 8 import core.stdc.string: memcpy; 9 import core.exception, 10 core.bitop, 11 std.array; 12 13 template Import(TT) 14 { 15 static if(is(TT == float)) 16 import impl = dplug.fft.impl_float; 17 else static if(is(TT == double)) 18 import impl = dplug.fft.impl_double; 19 else 20 static assert(0, "Not implemented"); 21 } 22 23 template st(alias a){ enum st = cast(size_t) a; } 24 25 /** 26 A class for calculating discrete fourier transform. The methods of this class 27 use split format for complex data. This means that a complex data set is 28 represented as two arrays - one for the real part and one for the imaginary 29 part. An instance of this class can only be used for transforms of one 30 particular size. The template parameter is the floating point type that the 31 methods of the class will operate on. 32 33 Example: 34 --- 35 import std.stdio, std.conv, std.exception; 36 import pfft.pfft; 37 38 void main(string[] args) 39 { 40 auto n = to!int(args[1]); 41 enforce((n & (n-1)) == 0, "N must be a power of two."); 42 43 alias Fft!float F; 44 45 F f; 46 f.initialize(n); 47 48 auto re = F.allocate(n); 49 auto im = F.allocate(n); 50 51 foreach(i, _; re) 52 readf("%s %s\n", &re[i], &im[i]); 53 54 f.fft(re, im); 55 56 foreach(i, _; re) 57 writefln("%s\t%s", re[i], im[i]); 58 } 59 --- 60 */ 61 struct Fft(T) 62 { 63 public: 64 nothrow: 65 @nogc: 66 mixin Import!T; 67 68 int log2n; 69 impl.Table table; 70 71 /** 72 The Fft constructor. The parameter is the size of data sets that $(D fft) and 73 $(D ifft) will operate on. I will refer to this number as n in the rest of the 74 documentation for this class.Tables used in fft and ifft are calculated in the 75 constructor. 76 */ 77 void initialize(size_t n) 78 { 79 assert((n & (n - 1)) == 0); 80 log2n = bsf(n); 81 auto mem = alignedRealloc(table, impl.table_size_bytes(log2n), 64); 82 table = impl.fft_table(log2n, mem); 83 assert(mem == table); 84 } 85 86 ~this() 87 { 88 alignedFree(table, 64); 89 } 90 91 /** 92 Calculates discrete fourier transform. $(D_PARAM re) should contain the real 93 part of the data and $(D_PARAM im) the imaginary part of the data. The method 94 operates in place - the result is saved back to $(D_PARAM re) and $(D_PARAM im). 95 Both arrays must be properly aligned - to obtain a properly aligned array you can 96 use $(D allocate). 97 */ 98 void fft(T[] re, T[] im) 99 { 100 assert(re.length == im.length); 101 assert(re.length == (st!1 << log2n)); 102 assert(((impl.alignment(re.length) - 1) & cast(size_t) re.ptr) == 0); 103 assert(((impl.alignment(im.length) - 1) & cast(size_t) im.ptr) == 0); 104 105 impl.fft(re.ptr, im.ptr, log2n, table); 106 } 107 108 /** 109 Calculates inverse discrete fourier transform scaled by n. The arguments have 110 the same role as they do in $(D fft). 111 */ 112 void ifft(T[] re, T[] im) 113 { 114 fft(im, re); 115 } 116 117 /** 118 Returns requited alignment for use with $(D fft), $(D ifft) and 119 $(D scale) methods. 120 */ 121 static size_t alignment(size_t n) 122 { 123 return impl.alignment(n); 124 } 125 126 /** 127 Allocates an array that is aligned properly for use with $(D fft), $(D ifft) and 128 $(D scale) methods. 129 */ 130 static T[] allocate(size_t n) 131 { 132 size_t bytes = n * T.sizeof; 133 T* r = cast(T*) alignedMalloc(bytes, alignment(bytes)); 134 assert(((impl.alignment(bytes) - 1) & cast(size_t) r) == 0); 135 return r[0..n]; 136 } 137 138 /** 139 Deallocates an array allocated with `allocate`. 140 */ 141 static void deallocate(T[] arr) 142 { 143 size_t n = arr.length; 144 alignedFree(arr.ptr, alignment(n)); 145 } 146 147 148 /** 149 Scales an array data by factor k. The array must be properly aligned. To obtain 150 a properly aligned array, use $(D allocate). 151 */ 152 static void scale(T[] data, T k) 153 { 154 assert(((impl.alignment(data.length) - 1) & cast(size_t) data.ptr) == 0); 155 impl.scale(data.ptr, data.length, k); 156 } 157 158 } 159 160 /** 161 A class for calculating real discrete fourier transform. The methods of this 162 class use split format for complex data. This means that complex data set is 163 represented as two arrays - one for the real part and one for the imaginary 164 part. An instance of this class can only be used for transforms of one 165 particular size. The template parameter is the floating point type that the 166 methods of the class will operate on. 167 168 Example: 169 --- 170 import std.stdio, std.conv, std.exception; 171 import pfft.pfft; 172 173 void main(string[] args) 174 { 175 auto n = to!int(args[1]); 176 enforce((n & (n-1)) == 0, "N must be a power of two."); 177 178 alias Rfft!float F; 179 180 F f; 181 f.initialize(n); 182 183 auto data = F.allocate(n); 184 185 foreach(ref e; data) 186 readf("%s\n", &e); 187 188 f.rfft(data); 189 190 foreach(i; 0 .. n / 2 + 1) 191 writefln("%s\t%s", data[i], (i == 0 || i == n / 2) ? 0 : data[i]); 192 } 193 --- 194 */ 195 struct Rfft(T) 196 { 197 public: 198 nothrow: 199 @nogc: 200 mixin Import!T; 201 202 int log2n; 203 Fft!T _complex; 204 impl.RTable rtable; 205 impl.ITable itable; 206 207 /** 208 The Rfft constructor. The parameter is the size of data sets that $(D rfft) will 209 operate on. I will refer to this number as n in the rest of the documentation 210 for this class. All tables used in rfft are calculated in the constructor. 211 */ 212 void initialize(size_t n) 213 { 214 // Doesn't work with lower size, but I'm unable to understand why 215 assert(n >= 128); 216 217 assert((n & (n - 1)) == 0); 218 log2n = bsf(n); 219 220 _complex.initialize(n / 2); 221 222 auto mem = alignedRealloc(rtable, impl.rtable_size_bytes(log2n), 64); 223 rtable = impl.rfft_table(log2n, mem); 224 assert(mem == rtable); 225 226 mem = alignedRealloc(itable, impl.itable_size_bytes(log2n), 64); 227 itable = impl.interleave_table(log2n, mem); 228 assert(mem == itable); 229 } 230 231 ~this() 232 { 233 alignedFree(rtable, 64); 234 alignedFree(itable, 64); 235 } 236 237 /** 238 Calculates discrete fourier transform of the real valued sequence in data. 239 The method operates in place. When the method completes, data contains the 240 result. First $(I n / 2 + 1) elements contain the real part of the result and 241 the rest contains the imaginary part. Imaginary parts at position 0 and 242 $(I n / 2) are known to be equal to 0 and are not stored, so the content of 243 data looks like this: 244 245 $(D r(0), r(1), ... r(n / 2), i(1), i(2), ... i(n / 2 - 1)) 246 247 248 The elements of the result at position greater than n / 2 can be trivially 249 calculated from the relation $(I DFT(f)[i] = DFT(f)[n - i]*) that holds 250 because the input sequence is real. 251 252 253 The length of the array must be equal to n and the array must be properly 254 aligned. To obtain a properly aligned array you can use $(D allocate). 255 */ 256 void rfft(T[] data) 257 { 258 assert(data.length == (st!1 << log2n)); 259 assert(((impl.alignment(data.length) - 1) & cast(size_t) data.ptr) == 0); 260 261 impl.deinterleave(data.ptr, log2n, itable); 262 impl.rfft(data.ptr, data[$ / 2 .. $].ptr, log2n, _complex.table, rtable); 263 } 264 265 /** 266 Calculates the inverse of $(D rfft), scaled by n (You can use $(D scale) 267 to normalize the result). Before the method is called, data should contain a 268 complex sequence in the same format as the result of $(D rfft). It is 269 assumed that the input sequence is a discrete fourier transform of a real 270 valued sequence, so the elements of the input sequence not stored in data 271 can be calculated from $(I DFT(f)[i] = DFT(f)[n - i]*). When the method 272 completes, the array contains the real part of the inverse discrete fourier 273 transform. The imaginary part is known to be equal to 0. 274 275 The length of the array must be equal to n and the array must be properly 276 aligned. To obtain a properly aligned array you can use $(D allocate). 277 */ 278 void irfft(T[] data) 279 { 280 assert(data.length == (st!1 << log2n)); 281 assert(((impl.alignment(data.length) - 1) & cast(size_t) data.ptr) == 0); 282 283 impl.irfft(data.ptr, data[$ / 2 .. $].ptr, log2n, _complex.table, rtable); 284 impl.interleave(data.ptr, log2n, itable); 285 } 286 287 /// An alias for Fft!T.allocate 288 alias Fft!(T).allocate allocate; 289 290 /// An alias for Fft!T.deallocate 291 alias Fft!(T).deallocate deallocate; 292 293 /// An alias for Fft!T.scale 294 alias Fft!(T).scale scale; 295 296 /// An alias for Fft!T.alignment 297 alias Fft!(T).alignment alignment; 298 299 @property complex(){ return _complex; } 300 } 301 302 303 private: 304 305 /// Returns: `true` if the pointer is suitably aligned. 306 bool isPointerAligned(void* p, size_t alignment) pure nothrow @nogc 307 { 308 assert(alignment != 0); 309 return ( cast(size_t)p & (alignment - 1) ) == 0; 310 } 311 312 /// Allocates an aligned memory chunk. 313 /// Functionally equivalent to Visual C++ _aligned_malloc. 314 /// Do not mix allocations with different alignment. 315 void* alignedMalloc(size_t size, size_t alignment) nothrow @nogc 316 { 317 assert(alignment != 0); 318 319 // Short-cut and use the C allocator to avoid overhead if no alignment 320 if (alignment == 1) 321 { 322 // C99: 323 // Implementation-defined behavior 324 // Whether the calloc, malloc, and realloc functions return a null pointer 325 // or a pointer to an allocated object when the size requested is zero. 326 void* res = malloc(size); 327 if (size == 0) 328 return null; 329 } 330 331 if (size == 0) 332 return null; 333 334 size_t request = requestedSize(size, alignment); 335 void* raw = malloc(request); 336 337 if (request > 0 && raw == null) // malloc(0) can validly return anything 338 onOutOfMemoryError(); 339 340 return storeRawPointerPlusSizeAndReturnAligned(raw, size, alignment); 341 } 342 343 /// Frees aligned memory allocated by alignedMalloc or alignedRealloc. 344 /// Functionally equivalent to Visual C++ _aligned_free. 345 /// Do not mix allocations with different alignment. 346 void alignedFree(void* aligned, size_t alignment) nothrow @nogc 347 { 348 assert(alignment != 0); 349 350 // Short-cut and use the C allocator to avoid overhead if no alignment 351 if (alignment == 1) 352 return free(aligned); 353 354 // support for free(NULL) 355 if (aligned is null) 356 return; 357 358 void** rawLocation = cast(void**)(cast(char*)aligned - size_t.sizeof); 359 free(*rawLocation); 360 } 361 362 /// Returns: next pointer aligned with alignment bytes. 363 void* nextAlignedPointer(void* start, size_t alignment) pure nothrow @nogc 364 { 365 return cast(void*)nextMultipleOf(cast(size_t)(start), alignment); 366 } 367 368 // Returns number of bytes to actually allocate when asking 369 // for a particular alignement 370 @nogc size_t requestedSize(size_t askedSize, size_t alignment) pure nothrow 371 { 372 enum size_t pointerSize = size_t.sizeof; 373 return askedSize + alignment - 1 + pointerSize * 2; 374 } 375 376 // Store pointer given my malloc, and size in bytes initially requested (alignedRealloc needs it) 377 @nogc void* storeRawPointerPlusSizeAndReturnAligned(void* raw, size_t size, size_t alignment) nothrow 378 { 379 enum size_t pointerSize = size_t.sizeof; 380 char* start = cast(char*)raw + pointerSize * 2; 381 void* aligned = nextAlignedPointer(start, alignment); 382 void** rawLocation = cast(void**)(cast(char*)aligned - pointerSize); 383 *rawLocation = raw; 384 size_t* sizeLocation = cast(size_t*)(cast(char*)aligned - 2 * pointerSize); 385 *sizeLocation = size; 386 return aligned; 387 } 388 389 // Returns: x, multiple of powerOfTwo, so that x >= n. 390 @nogc size_t nextMultipleOf(size_t n, size_t powerOfTwo) pure nothrow 391 { 392 // check power-of-two 393 assert( (powerOfTwo != 0) && ((powerOfTwo & (powerOfTwo - 1)) == 0)); 394 395 size_t mask = ~(powerOfTwo - 1); 396 return (n + powerOfTwo - 1) & mask; 397 } 398 399 /// Reallocates an aligned memory chunk allocated by alignedMalloc or alignedRealloc. 400 /// Functionally equivalent to Visual C++ _aligned_realloc. 401 /// Do not mix allocations with different alignment. 402 @nogc void* alignedRealloc(void* aligned, size_t size, size_t alignment) nothrow 403 { 404 assert(isPointerAligned(aligned, alignment)); 405 406 // If you fail here, it can mean you've used an uninitialized AlignedBuffer. 407 assert(alignment != 0); 408 409 // Short-cut and use the C allocator to avoid overhead if no alignment 410 if (alignment == 1) 411 { 412 void* res = realloc(aligned, size); 413 414 // C99: 415 // Implementation-defined behavior 416 // Whether the calloc, malloc, and realloc functions return a null pointer 417 // or a pointer to an allocated object when the size requested is zero. 418 if (size == 0) 419 return null; 420 421 return res; 422 } 423 424 if (aligned is null) 425 return alignedMalloc(size, alignment); 426 427 if (size == 0) 428 { 429 alignedFree(aligned, alignment); 430 return null; 431 } 432 433 size_t previousSize = *cast(size_t*)(cast(char*)aligned - size_t.sizeof * 2); 434 435 436 void* raw = *cast(void**)(cast(char*)aligned - size_t.sizeof); 437 size_t request = requestedSize(size, alignment); 438 439 // Heuristic: if a requested size is within 50% to 100% of what is already allocated 440 // then exit with the same pointer 441 if ( (previousSize < request * 4) && (request <= previousSize) ) 442 return aligned; 443 444 void* newRaw = malloc(request); 445 static if( __VERSION__ > 2067 ) // onOutOfMemoryError wasn't nothrow before July 2014 446 { 447 if (request > 0 && newRaw == null) // realloc(0) can validly return anything 448 onOutOfMemoryError(); 449 } 450 451 void* newAligned = storeRawPointerPlusSizeAndReturnAligned(newRaw, request, alignment); 452 size_t minSize = size < previousSize ? size : previousSize; 453 memcpy(newAligned, aligned, minSize); // memcpy OK 454 455 // Free previous data 456 alignedFree(aligned, alignment); 457 isPointerAligned(newAligned, alignment); 458 return newAligned; 459 } 460 461 462 unittest 463 { 464 { 465 int n = 16; 466 Fft!float A; 467 A.initialize(n); 468 float[] re = A.allocate(n); 469 float[] im = A.allocate(n); 470 scope(exit) A.deallocate(re); 471 scope(exit) A.deallocate(im); 472 re[] = 1.0f; 473 im[] = 0.0f; 474 A.fft(re, im); 475 A.ifft(re, im); 476 } 477 478 { 479 int n = 128; 480 Rfft!float B; 481 B.initialize(n); 482 float[] data = B.allocate(n); 483 scope(exit) B.deallocate(data); 484 data[] = 1.0f; 485 B.rfft(data); 486 B.irfft(data); 487 } 488 }