1 // Copyright Jernej Krempuš 2012 2 // Copyright Guillaume Piolat 2016-2023 3 // Distributed under the Boost Software License, Version 1.0. 4 // (See accompanying file LICENSE_1_0.txt or copy at 5 // http://www.boost.org/LICENSE_1_0.txt) 6 module dplug.fft.sse_double; 7 8 import inteli.emmintrin; 9 import dplug.fft.fft_impl; 10 11 struct Vector 12 { 13 nothrow: 14 @nogc: 15 alias double2 vec; 16 alias double T; 17 18 enum vec_size = 2; 19 enum log2_bitreverse_chunk_size = 2; 20 21 static vec scalar_to_vector(T a) 22 { 23 return _mm_set1_pd(a); 24 } 25 26 static void interleave(vec a0, vec a1, ref vec r0, ref vec r1) 27 { 28 r0 = _mm_unpacklo_pd(a0, a1); 29 r1 = _mm_unpackhi_pd(a0, a1); 30 } 31 32 static vec unaligned_load(T* p) 33 { 34 return _mm_loadu_pd(p); 35 } 36 37 static void unaligned_store(T* p, vec v) 38 { 39 _mm_storeu_pd(p, v); 40 } 41 42 static vec reverse(vec v) 43 { 44 return _mm_shuffle_pd!1(v, v); 45 } 46 47 private static vec * v(T * a) 48 { 49 return cast(vec*)a; 50 } 51 52 static void complex_array_to_real_imag_vec(int len)( 53 T * arr, ref vec rr, ref vec ri) 54 { 55 interleave(v(arr)[0], v(arr)[1], rr, ri); 56 } 57 58 alias interleave deinterleave; 59 60 static void transpose(int elements_per_vector)( 61 vec a0, vec a1, ref vec r0, ref vec r1) 62 { 63 static if(elements_per_vector == 2) 64 interleave(a0, a1, r0, r1); 65 else 66 static assert(0); 67 } 68 69 static void bit_reverse_swap(T * p0, T * p1, size_t m) 70 { 71 vec a0, a1, a2, a3, b0, b1, b2, b3; 72 73 a0 = v(p0 + m * 0)[0]; 74 a1 = v(p0 + m * 2)[0]; 75 b0 = v(p1 + m * 0)[0]; 76 b1 = v(p1 + m * 2)[0]; 77 interleave(a0, a1, a0, a1); 78 interleave(b0, b1, b0, b1); 79 v(p1 + m * 0)[0] = a0; 80 v(p1 + m * 2)[0] = a1; 81 v(p0 + m * 0)[0] = b0; 82 v(p0 + m * 2)[0] = b1; 83 84 a2 = v(p0 + m * 1)[1]; 85 a3 = v(p0 + m * 3)[1]; 86 b2 = v(p1 + m * 1)[1]; 87 b3 = v(p1 + m * 3)[1]; 88 interleave(a2, a3, a2, a3); 89 interleave(b2, b3, b2, b3); 90 v(p1 + m * 1)[1] = a2; 91 v(p1 + m * 3)[1] = a3; 92 v(p0 + m * 1)[1] = b2; 93 v(p0 + m * 3)[1] = b3; 94 95 a0 = v(p0 + m * 0)[1]; 96 a1 = v(p0 + m * 2)[1]; 97 a2 = v(p0 + m * 1)[0]; 98 a3 = v(p0 + m * 3)[0]; 99 interleave(a0, a1, a0, a1); 100 interleave(a2, a3, a2, a3); 101 b0 = v(p1 + m * 0)[1]; 102 b1 = v(p1 + m * 2)[1]; 103 b2 = v(p1 + m * 1)[0]; 104 b3 = v(p1 + m * 3)[0]; 105 v(p1 + m * 0)[1] = a2; 106 v(p1 + m * 2)[1] = a3; 107 v(p1 + m * 1)[0] = a0; 108 v(p1 + m * 3)[0] = a1; 109 interleave(b0, b1, b0, b1); 110 interleave(b2, b3, b2, b3); 111 v(p0 + m * 0)[1] = b2; 112 v(p0 + m * 2)[1] = b3; 113 v(p0 + m * 1)[0] = b0; 114 v(p0 + m * 3)[0] = b1; 115 } 116 117 static void bit_reverse(T * p, size_t m) 118 { 119 vec a0, a1, a2, a3; 120 a0 = v(p + m * 0)[0]; 121 a1 = v(p + m * 2)[0]; 122 a2 = v(p + m * 1)[1]; 123 a3 = v(p + m * 3)[1]; 124 interleave(a0, a1, a0, a1); 125 interleave(a2, a3, a2, a3); 126 v(p + m * 0)[0] = a0; 127 v(p + m * 2)[0] = a1; 128 v(p + m * 1)[1] = a2; 129 v(p + m * 3)[1] = a3; 130 131 a0 = v(p + m * 0)[1]; 132 a1 = v(p + m * 2)[1]; 133 a2 = v(p + m * 1)[0]; 134 a3 = v(p + m * 3)[0]; 135 interleave(a0, a1, a0, a1); 136 interleave(a2, a3, a2, a3); 137 v(p + m * 0)[1] = a2; 138 v(p + m * 2)[1] = a3; 139 v(p + m * 1)[0] = a0; 140 v(p + m * 3)[0] = a1; 141 } 142 } 143 144 struct Options 145 { 146 enum log2_bitreverse_large_chunk_size = 5; 147 enum large_limit = 13; 148 enum log2_optimal_n = 10; 149 enum passes_per_recursive_call = 4; 150 enum log2_recursive_passes_chunk_size = 5; 151 enum prefered_alignment = 4 * (1 << 10); 152 enum { fast_init }; 153 } 154