1 /**
2 * High-level interfaces for providing FFT analysis, real FFT, and resynthesis from grains.
3 *
4 * Copyright: Copyright Auburn Sounds 2015 and later.
5 * License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
6 * Authors:   Guillaume Piolat
7 */
8 module dplug.dsp.fft;
9 
10 import core.stdc..string;
11 
12 import std.math;
13 
14 import dplug.dsp.window;
15 import dplug.core.math;
16 import dplug.core.complex;
17 import dplug.core.vec;
18 import pfft.pfft;
19 
20 
21 enum FFTDirection
22 {
23     FORWARD = 0,
24     REVERSE = 1
25 }
26 
27 /// Perform in-place FFT.
28 /// Equivalent to `std.numeric.fft`, but this one is nothrow @nogc.
29 void forwardFFT(T)(BuiltinComplex!T[] buffer) nothrow @nogc
30 {
31     FFT_internal!(T, FFTDirection.FORWARD)(buffer);
32 }
33 
34 /// Perform in-place inverse FFT.
35 /// Equivalent to `std.numeric.inverseFft`, but this one is nothrow @nogc.
36 void inverseFFT(T)(BuiltinComplex!T[] buffer) nothrow @nogc
37 {
38     FFT_internal!(T, FFTDirection.REVERSE)(buffer);
39 }
40 
41 // PERF: use pfft instead would be much faster
42 private void FFT_internal(T, FFTDirection direction)(BuiltinComplex!T[] buffer) pure nothrow @nogc
43 {
44     int size = cast(int)(buffer.length);
45     assert(isPowerOfTwo(size));
46     int m = iFloorLog2(size);
47 
48     BuiltinComplex!T* pbuffer = buffer.ptr;
49 
50     // do the bit reversal
51     int i2 = cast(int)size / 2;
52     int j = 0;
53     for (int i = 0; i < size - 1; ++i)
54     {
55         if (i < j)
56         {
57             auto tmp = pbuffer[i];
58             pbuffer[i] = pbuffer[j];
59             pbuffer[j] = tmp;
60         }
61 
62         int k = i2;
63         while(k <= j)
64         {
65             j = j - k;
66             k = k / 2;
67         }
68         j += k;
69     }
70 
71     // compute the FFT
72     BuiltinComplex!T c = -1+0i;
73     int l2 = 1;
74     for (int l = 0; l < m; ++l)
75     {
76         int l1 = l2;
77         l2 = l2 * 2;
78         BuiltinComplex!T u = 1+0i;
79         for (int j2 = 0; j2 < l1; ++j2)
80         {
81             int i = j2;
82             while (i < size)
83             {
84                 int i1 = i + l1;
85                 BuiltinComplex!T t1 = u * pbuffer[i1];
86                 pbuffer[i1] = pbuffer[i] - t1;
87                 pbuffer[i] += t1;
88                 i += l2;
89             }
90             u = u * c;
91         }
92 
93         T newImag = sqrt((1 - c.re) / 2);
94         static if (direction == FFTDirection.FORWARD)
95             newImag = -newImag;
96         T newReal = sqrt((1 + c.re) / 2);
97         c = newReal + 1.0fi * newImag;
98     }
99 
100     // scaling when doing the reverse transformation, to avoid being multiplied by size
101     static if (direction == FFTDirection.REVERSE)
102     {
103         T divider = 1 / cast(T)size;
104         for (int i = 0; i < size; ++i)
105         {
106             pbuffer[i] = pbuffer[i] * divider;
107         }
108     }
109 }
110 
111 
112 // should operate the same as Phobos FFT
113 unittest
114 {
115     import std.complex;
116     import std.numeric: approxEqual, fft;
117 
118     bool approxEqualArrBuiltin(BuiltinComplex!double[] a, BuiltinComplex!double[] b) pure
119     {
120         foreach(i; 0..a.length)
121         {
122             if (!approxEqual(a[i].re, b[i].re))
123                 return false;
124             if (!approxEqual(a[i].im, b[i].im))
125                 return false;
126         }
127         return true;
128     }
129 
130     bool approxEqualArr(BuiltinComplex!double[] a, Complex!double[] b) pure
131     {
132         foreach(i; 0..a.length)
133         {
134             if (!approxEqual(a[i].re, b[i].re))
135                 return false;
136             if (!approxEqual(a[i].im, b[i].im))
137                 return false;
138         }
139         return true;
140     }
141 
142     BuiltinComplex!double[] A = [1+0i, 13-4i, 5-5i, 0+2i];
143     Complex!double[] Abis = [Complex!double(1, 0), Complex!double(13, -4), Complex!double(5,-5), Complex!double(0,2)];
144     Complex!double[] fftARef = fft(Abis);
145 
146     auto B = A.dup;
147     forwardFFT!double(B);
148     assert(approxEqualArr(B, fftARef));
149     inverseFFT!double(B);
150     assert(approxEqualArrBuiltin(B, A));
151 }
152 
153 /// From a signal, output chunks of determined size, with optional overlap.
154 /// Introduces approximately windowSize/2 samples delay.
155 struct Segmenter(T)
156 {
157 nothrow:
158 @nogc:
159 
160     int segmentSize() pure const 
161     {
162         return _segmentSize;
163     }
164 
165     int analysisPeriod() pure const
166     {
167         return _analysisPeriod;
168     }
169 
170     /// To call at initialization and whenever samplerate changes.
171     /// segmentSize = size of sound segments, expressed in samples.
172     /// analysisPeriod = period of analysis results, allow to be more precise frequentially, expressed in samples.
173     void initialize(int segmentSize, int analysisPeriod)
174     {
175         assert(analysisPeriod <= segmentSize); // no support for zero overlap
176 
177         // 1-sized FFT support
178         if (analysisPeriod == 0)
179             analysisPeriod = 1;
180 
181         _segmentSize = segmentSize;
182         _analysisPeriod = analysisPeriod;
183 
184         // clear input delay
185         _buffer.reallocBuffer(_segmentSize);
186         _buffer[] = 0;
187         _index = 0;
188     }
189 
190     ~this()
191     {
192         _buffer.reallocBuffer(0);
193     }
194 
195     @disable this(this);
196 
197     // Push one sample, eventually call the delegate to process a segment.
198     bool feed(T x, scope void delegate(T[] segment) nothrow @nogc processSegment = null)
199     {
200         _buffer[_index] = x;
201         _index = _index + 1;
202         if (_index >= _segmentSize)
203         {
204             // process segment (optional)
205             if (processSegment !is null)
206                 processSegment(_buffer[0.._segmentSize]);
207 
208             // rotate buffer
209             {
210                 int samplesToDrop = _analysisPeriod;
211                 assert(0 < samplesToDrop && samplesToDrop <= _segmentSize);
212                 int remainingSamples = _segmentSize - samplesToDrop;
213 
214                 // FUTURE: use ring buffer instead of copy?
215                 memmove(_buffer.ptr, _buffer.ptr + samplesToDrop, T.sizeof * remainingSamples);
216                 _index = remainingSamples;
217 
218             }
219             return true;
220         }
221         else
222             return false;
223     }
224 
225     /// Returns: Internal buffer.
226     T[] buffer()
227     {
228         return _buffer;
229     }
230 
231 private:
232     T[] _buffer;
233     int _segmentSize;     // in samples
234     int _analysisPeriod; // in samples
235     int _index;
236 }
237 
238 
239 /// From short term windowed data, output the summed signal.
240 /// Segments can be irregular and have different size.
241 struct ShortTermReconstruction
242 {
243 nothrow:
244 @nogc:
245     /// maxSimultSegments is the maximum number of simulatneously summed samples.
246     /// maxSegmentLength in samples
247     void initialize(int maxSimultSegments, int maxSegmentLength)
248     {
249         _maxSegmentLength = maxSegmentLength;
250         _maxSimultSegments = maxSimultSegments;
251         _desc.reallocBuffer(maxSimultSegments);
252         for (int i = 0; i < _maxSimultSegments; ++i)
253         {
254             _desc[i].playOffset = 0; // initially inactive
255             _desc[i].length = 0;
256             _desc[i].buffer = null;
257             _desc[i].buffer.reallocBuffer(maxSegmentLength);
258             //reallocBuffer(_desc[i].buffer, maxSegmentLength);
259         } //)
260     }
261 
262     ~this()
263     {
264         if (_desc !is null)
265             for (int i = 0; i < _maxSimultSegments; ++i)
266                 _desc[i].buffer.reallocBuffer(0);
267         _desc.reallocBuffer(0);
268     }
269 
270     @disable this(this);
271 
272     // Copy segment to a free slot, and start its summing.
273     // The first sample of this segment will be played at next() call if delay is 0.
274     void startSegment(float[] newSegment, int delay = 0)
275     {
276         assert(newSegment.length <= _maxSegmentLength);
277         int i = allocSegmentSlot();
278         int len = cast(int)(newSegment.length);
279         _desc[i].playOffset = -delay;
280         _desc[i].length = len;
281         _desc[i].buffer[0..len] = newSegment[]; // copy segment
282     }
283 
284     // Same, but with the input being split into two slices A ~ B. This is a common case
285     // when summing zero-phase windows in STFT analysis.
286     void startSegmentSplitted(float[] segmentA, float[] segmentB, int delay = 0)
287     {
288         int i = allocSegmentSlot();
289         int lenA = cast(int)(segmentA.length);
290         int lenB = cast(int)(segmentB.length);
291         assert(lenA + lenB <= _maxSegmentLength);
292 
293         _desc[i].playOffset = -delay;
294         _desc[i].length = lenA + lenB;
295         _desc[i].buffer[0..lenA] = segmentA[];         // copy segment part A
296         _desc[i].buffer[lenA..lenA+lenB] = segmentB[]; // copy segment part B
297     }
298 
299     float nextSample()
300     {
301         float sum = 0;
302         foreach(ref desc; _desc)
303         {
304             if (desc.playOffset < desc.length)
305             {
306                 if (desc.playOffset >= 0)
307                     sum += desc.buffer[desc.playOffset];
308                 desc.playOffset += 1;
309             }
310         }
311         return sum;
312     }
313 
314     void nextBuffer(float* outAudio, int frames)
315     {
316         outAudio[0..frames] = 0;
317 
318         // Add each pending segment
319         foreach(ref desc; _desc)
320         {
321             const int offset = desc.playOffset;
322             const int len = desc.length;
323             if (offset < len)
324             {
325                 // Compute relative time event for the segment
326                 int startOfSegment = -offset;
327                 int endOfSegment = startOfSegment + len;
328 
329                 // Compute the area in 0..frames we can playback the segment
330                 int startOfSumming = startOfSegment;
331                 if (startOfSumming < 0)
332                     startOfSumming = 0;
333                 if (startOfSumming >= frames)
334                     startOfSumming = frames;
335                 int endOfSumming = endOfSegment;
336                 if (endOfSumming >= frames)
337                     endOfSumming = frames;
338 
339                 int count = endOfSumming - startOfSumming;
340                 assert(count >= 0);
341                 
342                 const(float)* segmentData = desc.buffer.ptr + offset;
343 
344                 // PERF: this can be optimized further
345                 for (int i = startOfSumming; i < endOfSumming; ++i) 
346                 {
347                     outAudio[i] += segmentData[i];
348                 }
349                 desc.playOffset = offset + frames;
350             }
351             // else disabled segment
352         }
353     }
354 
355 private:
356 
357     struct SegmentDesc
358     {
359         int playOffset; // offset in this segment
360         int length; // length in this segment
361         float[] buffer; // 0..length => data for this segment
362 
363         bool active() pure const nothrow @nogc
364         {
365             return playOffset < length;
366         }
367     }
368     int _maxSimultSegments;
369     int _maxSegmentLength;
370     SegmentDesc[] _desc;
371 
372     int allocSegmentSlot()
373     {
374         for (int i = 0; i < _maxSimultSegments; ++i)
375             if (!_desc[i].active())
376                 return i;
377         assert(false); // maxSimultSegments too small, or usage error
378     }
379 }
380 
381 version = useRealFFT;
382 
383 /// From a signal, output short term FFT data.
384 /// Variable overlap.
385 /// Introduces approximately windowSize/2 samples delay.
386 /// Uses a real FFT to gain some speed.
387 struct FFTAnalyzer(T)
388 {
389 public:
390 
391     /// To call at initialization and whenever samplerate changes.
392     /// windowSize = size of window, expressed in samples
393     /// fftSize = size of FFT. Must be power-of-two and >= windowSize. Missing samples are zero-padded in time domain.
394     /// analysisPeriod = period of analysis results, allow to be more precise frequentially, expressed in samples.
395     /// Basic overlap is achieved with windowSize = 2 * analysisPeriod
396     /// if zeroPhaseWindowing = true, "zero phase" windowing is used
397     /// (center of window is at first sample, zero-padding happen at center)
398     void initialize(int windowSize, int fftSize, int analysisPeriod, WindowDesc windowDesc, bool zeroPhaseWindowing) nothrow @nogc
399     {
400         assert(isPowerOfTwo(fftSize));
401         assert(fftSize >= windowSize);
402 
403         _zeroPhaseWindowing = zeroPhaseWindowing;
404 
405         _fftSize = fftSize;
406 
407         _window.initialize(windowDesc, windowSize);
408         _windowSize = windowSize;
409 
410         // account for window shape
411         _scaleFactor = fftSize / _window.sumOfWindowSamples();
412 
413         // account for overlap
414         _scaleFactor *= cast(float)(analysisPeriod) / windowSize;
415 
416         _segmenter.initialize(windowSize, analysisPeriod);
417 
418         version(useRealFFT)
419         {
420             _timeData.reallocBuffer(fftSize);
421             _rfft.initialize(fftSize);
422         }
423     }
424 
425     ~this()
426     {
427         version(useRealFFT)
428         {
429             _timeData.reallocBuffer(0);
430         }
431     }
432 
433     version(useRealFFT)
434     {
435         /// Gets the RFFT object which allows to perform efficient incverse FFT with the same pre-computed tables.
436         ref RFFT!T realFFT()
437         {
438             return _rfft;
439         }
440     }
441 
442     bool feed(float x, BuiltinComplex!T[] fftData) nothrow @nogc
443     {
444         void processSegment(T[] segment) nothrow @nogc
445         {
446             int windowSize = _windowSize;
447             assert(segment.length == _windowSize);
448 
449             T scaleFactor = _scaleFactor;
450 
451             version(useRealFFT)
452             {
453                 if (_zeroPhaseWindowing)
454                 {
455                     // "Zero Phase" windowing
456                     // Through clever reordering, phase of ouput coefficients will relate to the
457                     // center of the window
458                     //_
459                     // \_                   _/
460                     //   \                 /
461                     //    \               /
462                     //     \_____________/____
463                     int center = (_windowSize - 1) / 2; // position of center bin
464                     int nLeft = _windowSize - center;
465                     for (int i = 0; i < nLeft; ++i)
466                         _timeData[i] = (segment[center + i] * _window[center + i] * scaleFactor);
467 
468                     int nPadding = _fftSize - _windowSize;
469                     for (int i = 0; i < nPadding; ++i)
470                         _timeData[nLeft + i] = 0;
471 
472                     for (int i = 0; i < center; ++i)
473                         _timeData[nLeft + nPadding + i] = (segment[i] * _window[i] * scaleFactor);
474                 }
475                 else
476                 {
477                     // "Normal" windowing
478                     // Phase of output coefficient will relate to the start of the buffer
479                     //      _
480                     //    _/ \_
481                     //   /     \
482                     //  /       \
483                     //_/         \____________
484 
485                     // fill FFT buffer and multiply by window
486                     for (int i = 0; i < _windowSize; ++i)
487                         _timeData[i] = (segment[i] * _window[i] * scaleFactor);
488 
489                     // zero-padding
490                     for (int i = _windowSize; i < _fftSize; ++i)
491                         _timeData[i] = 0;
492                 }
493             }
494             else
495             {
496 
497                 if (_zeroPhaseWindowing)
498                 {
499                     // "Zero Phase" windowing
500                     // Through clever reordering, phase of ouput coefficients will relate to the
501                     // center of the window
502                     //_
503                     // \_                   _/
504                     //   \                 /
505                     //    \               /
506                     //     \_____________/____
507                     int center = (_windowSize - 1) / 2; // position of center bin
508                     int nLeft = _windowSize - center;
509                     for (int i = 0; i < nLeft; ++i)
510                         fftData[i] = (segment[center + i] * _window[center + i] * scaleFactor)+0i;
511 
512                     int nPadding = _fftSize - _windowSize;
513                     for (int i = 0; i < nPadding; ++i)
514                         fftData[nLeft + i] = 0+0i;
515 
516                     for (int i = 0; i < center; ++i)
517                         fftData[nLeft + nPadding + i] = (segment[i] * _window[i] * scaleFactor)+0i;
518                 }
519                 else
520                 {
521                     // "Normal" windowing
522                     // Phase of output coefficient will relate to the start of the buffer
523                     //      _
524                     //    _/ \_
525                     //   /     \
526                     //  /       \
527                     //_/         \____________
528 
529                     // fill FFT buffer and multiply by window
530                     for (int i = 0; i < _windowSize; ++i)
531                         fftData[i] = (segment[i] * _window[i] * scaleFactor)+0i;
532 
533                     // zero-padding
534                     for (int i = _windowSize; i < _fftSize; ++i)
535                         fftData[i] = 0+0i;
536                 }
537             }
538 
539             // perform forward FFT on this slice
540             version(useRealFFT)
541                 _rfft.forwardTransform(_timeData[], fftData[0.._fftSize/2+1]);
542             else
543                 forwardFFT!T(fftData[0.._fftSize]);
544         }
545 
546         return _segmenter.feed(x, &processSegment);
547     }
548 
549 private:
550     Segmenter!T _segmenter;
551     bool _zeroPhaseWindowing;
552     int _fftSize;        // in samples
553 
554     Window!T _window;
555     int _windowSize;     // in samples
556 
557     T _scaleFactor; // account to the shape of the windowing function
558 
559     version(useRealFFT)
560     {
561         RFFT!T _rfft;
562         T[] _timeData;
563     }
564 }
565 
566 unittest
567 {
568     FFTAnalyzer!float a;
569     a.initialize(1024, 2048, 512, WindowDesc(WindowType.hann, WindowAlignment.left), true);
570 
571     FFTAnalyzer!double b;
572     b.initialize(1024, 2048, 512, WindowDesc(WindowType.hann, WindowAlignment.right), false);
573 }
574 
575 
576 /// Converts a normalized frequency to a FFT bin.
577 /// Params:
578 ///     normalizedFrequency = Frequency in cycles per sample.
579 ///     fftSize = Size of FFT.
580 /// Returns: Corresponding fractional bin.
581 float convertNormalizedFrequencyToFFTBin(float normalizedFrequency, int fftSize) nothrow @nogc
582 {
583     return (normalizedFrequency * fftSize);
584 }
585 
586 /// Converts a frequency to a FFT bin.
587 /// Returns: Corresponding fractional bin.
588 float convertFrequencyToFFTBin(float frequencyHz, float samplingRate, int fftSize) nothrow @nogc
589 {
590     return (frequencyHz * fftSize) / samplingRate;
591 }
592 
593 /// Converts a frequency to a FFT bin.
594 /// Returns: Corresponding fractional bin.
595 float convertFrequencyToFFTBinInv(float frequencyHz, float invSamplingRate, int fftSize) nothrow @nogc
596 {
597     return (frequencyHz * fftSize) * invSamplingRate;
598 }
599 
600 /// Converts a FFT bin to a frequency.
601 /// Returns: Corresponding center frequency.
602 float convertFFTBinToFrequency(float fftBin, int fftSize, float samplingRate) nothrow @nogc
603 {
604     return (samplingRate * fftBin) / fftSize;
605 }
606 
607 /// Converts a FFT bin to a frequency.
608 /// Returns: Corresponding center frequency.
609 float convertFFTBinToFrequencyInv(float fftBin, float invFFTSize, float samplingRate) nothrow @nogc
610 {
611     return (samplingRate * fftBin) * invFFTSize;
612 }
613 
614 /// Converts a FFT bin to a normalized frequency.
615 /// Params:
616 ///     fftBin = Bin index in the FFT.
617 ///     fftSize = Size of FFT.
618 /// Returns: Corresponding normalized frequency
619 float convertFFTBinToNormalizedFrequency(float fftBin, int fftSize) nothrow @nogc
620 {
621     return fftBin / fftSize;
622 }
623 
624 
625 /// Converts a FFT bin to a normalized frequency.
626 /// Params:
627 ///     fftBin = Bin index of the FFT.
628 ///     invFFTSize = Inverse size of FFT.
629 /// Returns: Corresponding normalized frequency.
630 float convertFFTBinToNormalizedFrequencyInv(float fftBin, float invFFTSize) nothrow @nogc
631 {
632     return fftBin * invFFTSize;
633 }
634 
635 /// Perform a FFT from a real signal, saves up CPU.
636 struct RFFT(T)
637 {
638 public:
639 nothrow:
640 @nogc:
641 
642     void initialize(int length)
643     {
644         _length = length;
645         _internal.initialize(length);  
646         _alignment = cast(int)_internal.alignment(length);
647 
648         _buffer.reallocBuffer(length, _alignment);
649     }
650 
651     ~this()
652     {
653         if (_buffer != null)
654             _buffer.reallocBuffer(0, _alignment);
655     }
656 
657     @disable this(this);
658  
659     void forwardTransform(const(T)[] timeData, BuiltinComplex!T[] outputBins)
660     {
661         _buffer[] = timeData[];
662    
663         // Perform real FFT
664         _internal.rfft(_buffer);
665 
666         //_buffer[]  =0;
667         // At this point, f contains:
668         //    f destination array (frequency bins)
669         //    f[0...length(x)/2] = real values,
670         //    f[length(x)/2+1...length(x)-1] = imaginary values of coefficents 1...length(x)/2-1.
671         // So we have to reshuffle them to have nice complex bins.
672         int mid = _length/2;
673         outputBins[0] = _buffer[0] + 0i;
674         for(int i = 1; i < mid; ++i)
675             outputBins[i] = _buffer[i] + 1i * _buffer[mid+i];
676         outputBins[mid] = _buffer[mid] + 0i; // for length 1, this still works
677     }
678 
679     /**
680     * Compute the inverse FFT of the array. Perform post-scaling.
681     *
682     * Params:
683     *    inputBins = Source arrays (N/2 + 1 frequency bins).    
684     *    timeData = Destination array (N time samples).
685     *
686     * Note: 
687     *    This transform has the benefit you don't have to conjugate the "mirrored" part of the FFT.
688     *    Excess data in imaginary part of DC and Nyquist bins are ignored.
689     */
690     void reverseTransform(BuiltinComplex!T[] inputBins, T[] timeData) 
691     {
692         // On inverse transform, scale down result
693         T invMultiplier = cast(T)1 / _length;
694 
695         // Shuffle input frequency bins, and scale down.
696         int mid = _length/2;
697         for(int i = 0; i <= mid; ++i)
698             _buffer[i] = inputBins[i].re * invMultiplier;
699         for(int i = mid+1; i < _length; ++i)
700             _buffer[i] = inputBins[i-mid].im * invMultiplier;
701 
702         // At this point, the format in f is:
703         //          f [0...length(x)/2] = real values
704         //          f [length(x)/2+1...length(x)-1] = negative imaginary values of coefficents 1...length(x)/2-1.
705         // Which is suitable for the RealFFT algorithm.
706         _internal.irfft(_buffer);
707 
708         // Perf: use scaling from pfft
709         timeData[] = _buffer[];
710     }
711 
712 private:
713     // Required alignment for RFFT buffers.
714     int _alignment;
715 
716     // pfft object
717     Rfft!T _internal;
718 
719     // length of FFT
720     int _length;
721 
722     // temporary buffer since pfft is in-place
723     T[] _buffer;
724 }