1 /// Normal from depth estimation 2 module dplug.gui.ransac; 3 4 import dplug.math.vector; 5 import dplug.core.math; 6 7 import inteli.emmintrin; 8 9 nothrow @nogc: 10 11 // Note: these functions should move to intel-intrinsics #BONUS 12 13 /// 4D dot product 14 package float _mm_dot_ps(__m128 a, __m128 b) pure 15 { 16 __m128 m = a * b; 17 return m.array[0] + m.array[1] + m.array[2] + m.array[3]; 18 } 19 20 // Note: .w element is undefined 21 package __m128 _mm_crossproduct_ps(__m128 a, __m128 b) pure 22 { 23 enum ubyte SHUF1 = _MM_SHUFFLE(3, 0, 2, 1); 24 enum ubyte SHUF2 = _MM_SHUFFLE(3, 1, 0, 2); 25 return _mm_sub_ps( 26 _mm_mul_ps(_mm_shuffle_ps!SHUF1(a, a), _mm_shuffle_ps!SHUF2(b, b)), 27 _mm_mul_ps(_mm_shuffle_ps!SHUF2(a, a), _mm_shuffle_ps!SHUF1(b, b)) 28 ); 29 } 30 31 package __m128 _mm_reflectnormal_ps(__m128 normalA, __m128 normalB) pure // W must be zero 32 { 33 __m128 dotBA = normalB * normalA; 34 float sum = 2 * _mm_dot_ps(normalA, normalB); 35 return normalA - _mm_set1_ps(sum) * normalB; 36 } 37 38 39 package __m128 _mm_fast_normalize_ps(__m128 v) pure 40 { 41 __m128 squared = _mm_mul_ps(v, v); 42 float squaredLength = squared.array[0] + squared.array[1] + squared.array[2] + squared.array[3]; 43 __m128 invRoot = _mm_rsqrt_ss(_mm_set_ss(squaredLength)); 44 invRoot = _mm_shuffle_ps!0(invRoot, invRoot); 45 return _mm_mul_ps(v, invRoot); 46 } 47 48 // Plane-fitting method for finding normals. 49 // This has superseded the previous "RANSAC" method, thanks to being much faster, and also gives better results thanks to being anisotropic. 50 // There could be a tiny bit of visual quality to be extracted by tuning sigma a bit. 51 // Method from http://www.ilikebigbits.com/2015_03_04_plane_from_points.html 52 // Courtesy of Emil Ernerfeldt. 53 vec3f computePlaneFittingNormal( 54 // Must point at 12 floats containing depth of pixels in the neighbourhood, and 3 more for padding. Normal will be computed in this pixel space. 55 // c c c 56 // c c c <--- x is padding, and E is the center pixel depth 57 // c c c 58 // x x x 59 float* depthNeighbourhood) pure // number of inliers 60 { 61 // sigma 0.58 62 // use this page to change the filter: https://observablehq.com/@jobleonard/gaussian-kernel-calculater 63 // Not really possible to go below without increasing digital malaise. 64 65 //static immutable float[3] sigma = [0.1913294239359622, 0.6173411521280758, 0.1913294239359622]; 66 align(16) static immutable float[8] WEIGHTS = 67 [ 68 0.03660694846, 0.118115527008, 0.03660694846, 69 0.118115527008, /* 0.38111009811, */ 0.118115527008, 70 0.03660694846, 0.118115527008, 0.03660694846, 71 ]; 72 73 __m128 mmDepth_0_3 = _mm_loadu_ps(&depthNeighbourhood[0]); 74 __m128 mmDepth_5_8 = _mm_loadu_ps(&depthNeighbourhood[5]); 75 __m128 mmWeights_0_3 = _mm_load_ps(&WEIGHTS[0]); 76 __m128 mmWeights_5_8 = _mm_load_ps(&WEIGHTS[4]); 77 __m128 meanDepth = mmDepth_0_3 * mmWeights_0_3 + mmDepth_5_8 * mmWeights_5_8; 78 float filtDepth = depthNeighbourhood[4] * 0.38111009811f + meanDepth.array[0] + meanDepth.array[1] + meanDepth.array[2] + meanDepth.array[3]; 79 80 // PERF: eventually possible to take filtDepth = depthNeighbourhood[4] directly but at the cost of quality. Difficult tradeoff visually. 81 82 // Compute mean weighted depth 83 __m128 mmFiltDepth = _mm_set1_ps(filtDepth); 84 85 mmDepth_0_3 = mmDepth_0_3 - mmFiltDepth; 86 mmDepth_5_8 = mmDepth_5_8 - mmFiltDepth; 87 88 // We are supposed to compute a full 3x3 covariance matrix, excluding symmetries. 89 // However it simplifies a lot thanks to being a grid with known x and y. 90 // Only xz and yz factors in the matrix need to be computed. 91 92 align(16) static immutable float[8] XZ_WEIGHTS = // those are derived from the above WEIGHTS kernel 93 [ 94 -0.03660694846, 0.0f, 0.03660694846, 95 -0.118115527008, 0.118115527008, 96 -0.03660694846, 0.0f, 0.03660694846, 97 ]; 98 99 align(16) static immutable float[8] YZ_WEIGHTS = // those are derived from the above WEIGHTS kernel 100 [ 101 -0.03660694846, -0.118115527008, -0.03660694846, 102 0.0f, 0.0f, 103 0.03660694846, 0.118115527008, 0.03660694846 104 ]; 105 106 __m128 mmXZ = mmDepth_0_3 * _mm_load_ps(&XZ_WEIGHTS[0]) + mmDepth_5_8 * _mm_load_ps(&XZ_WEIGHTS[4]); 107 __m128 mmYZ = mmDepth_0_3 * _mm_load_ps(&YZ_WEIGHTS[0]) + mmDepth_5_8 * _mm_load_ps(&YZ_WEIGHTS[4]); 108 109 110 float xz = mmXZ.array[0] + mmXZ.array[1] + mmXZ.array[2] + mmXZ.array[3]; 111 float yz = mmYZ.array[0] + mmYZ.array[1] + mmYZ.array[2] + mmYZ.array[3]; 112 113 // Y inversion happens here. 114 __m128 mmNormal = _mm_setr_ps(-xz, 115 yz, 116 0.382658847856f, // this depends on sigma, expected value for xx * yy (4 * WEIGHTS[0] + 2 * WEIGHTS[1]) 117 // Note that we use the normalization step to factor by xx (which is equal to yy) 118 0.0f); 119 mmNormal = _mm_fast_normalize_ps(mmNormal); 120 return vec3f(mmNormal.array[0], mmNormal.array[1], mmNormal.array[2]); 121 }