1 /// Normal from depth estimation
2 module dplug.gui.ransac;
3 
4 import dplug.math.vector;
5 import dplug.core.math;
6 
7 import inteli.emmintrin;
8 
9 nothrow @nogc:
10 
11 // Note: these functions should move to intel-intrinsics #BONUS
12 
13 /// 4D dot product
14 package float _mm_dot_ps(__m128 a, __m128 b) pure
15 {
16     __m128 m = a * b;
17     return m.array[0] + m.array[1] + m.array[2] + m.array[3];
18 }
19 
20 // Note: .w element is undefined
21 package __m128 _mm_crossproduct_ps(__m128 a, __m128 b) pure
22 {
23     enum ubyte SHUF1 = _MM_SHUFFLE(3, 0, 2, 1);
24     enum ubyte SHUF2 = _MM_SHUFFLE(3, 1, 0, 2);
25     return _mm_sub_ps(
26                       _mm_mul_ps(_mm_shuffle_ps!SHUF1(a, a), _mm_shuffle_ps!SHUF2(b, b)), 
27                       _mm_mul_ps(_mm_shuffle_ps!SHUF2(a, a), _mm_shuffle_ps!SHUF1(b, b))
28                       );
29 }
30 
31 package __m128 _mm_reflectnormal_ps(__m128 normalA, __m128 normalB) pure // W must be zero
32 {
33     __m128 dotBA = normalB * normalA;
34     float sum = 2 * _mm_dot_ps(normalA, normalB);
35     return normalA - _mm_set1_ps(sum) * normalB;
36 }
37 
38 
39 package __m128 _mm_fast_normalize_ps(__m128 v) pure
40 {
41     __m128 squared = _mm_mul_ps(v, v);
42     float squaredLength = squared.array[0] + squared.array[1] + squared.array[2] + squared.array[3];
43     __m128 invRoot = _mm_rsqrt_ss(_mm_set_ss(squaredLength));
44     invRoot = _mm_shuffle_ps!0(invRoot, invRoot);
45     return _mm_mul_ps(v, invRoot);
46 }
47 
48 // Plane-fitting method for finding normals.
49 // This has superseded the previous "RANSAC" method, thanks to being much faster, and also gives better results thanks to being anisotropic.
50 // There could be a tiny bit of visual quality to be extracted by tuning sigma a bit. 
51 // Method from http://www.ilikebigbits.com/2015_03_04_plane_from_points.html
52 // Courtesy of Emil Ernerfeldt.
53 vec3f computePlaneFittingNormal(
54     // Must point at 12 floats containing depth of pixels in the neighbourhood, and 3 more for padding. Normal will be computed in this pixel space.
55     // c c c
56     // c c c     <--- x is padding, and E is the center pixel depth
57     // c c c 
58     // x x x
59     float* depthNeighbourhood) pure // number of inliers
60 {
61     // sigma 0.58
62     // use this page to change the filter: https://observablehq.com/@jobleonard/gaussian-kernel-calculater
63     // Not really possible to go below without increasing digital malaise.
64 
65     //static immutable float[3] sigma = [0.1913294239359622, 0.6173411521280758, 0.1913294239359622];
66     align(16) static immutable float[8] WEIGHTS =
67     [
68         0.03660694846, 0.118115527008, 0.03660694846,
69         0.118115527008, /* 0.38111009811, */ 0.118115527008,
70         0.03660694846, 0.118115527008, 0.03660694846,
71     ];
72 
73     __m128 mmDepth_0_3 = _mm_loadu_ps(&depthNeighbourhood[0]);
74     __m128 mmDepth_5_8 = _mm_loadu_ps(&depthNeighbourhood[5]);
75     __m128 mmWeights_0_3 = _mm_load_ps(&WEIGHTS[0]);
76     __m128 mmWeights_5_8 = _mm_load_ps(&WEIGHTS[4]);
77     __m128 meanDepth = mmDepth_0_3 * mmWeights_0_3 + mmDepth_5_8 * mmWeights_5_8;
78     float filtDepth = depthNeighbourhood[4] * 0.38111009811f + meanDepth.array[0] + meanDepth.array[1] + meanDepth.array[2] + meanDepth.array[3];
79 
80     // PERF: eventually possible to take filtDepth = depthNeighbourhood[4] directly but at the cost of quality. Difficult tradeoff visually.
81 
82     // Compute mean weighted depth
83     __m128 mmFiltDepth = _mm_set1_ps(filtDepth);
84 
85     mmDepth_0_3 = mmDepth_0_3 - mmFiltDepth;
86     mmDepth_5_8 = mmDepth_5_8 - mmFiltDepth;
87     
88     // We are supposed to compute a full 3x3 covariance matrix, excluding symmetries.
89     // However it simplifies a lot thanks to being a grid with known x and y.
90     // Only xz and yz factors in the matrix need to be computed.
91 
92     align(16) static immutable float[8] XZ_WEIGHTS = // those are derived from the above WEIGHTS kernel
93     [
94         -0.03660694846,         0.0f, 0.03660694846,
95         -0.118115527008,              0.118115527008,
96         -0.03660694846,         0.0f, 0.03660694846,
97     ];
98 
99     align(16) static immutable float[8] YZ_WEIGHTS = // those are derived from the above WEIGHTS kernel
100     [
101         -0.03660694846, -0.118115527008, -0.03660694846,
102                   0.0f,                            0.0f,
103          0.03660694846,  0.118115527008,  0.03660694846
104     ];
105 
106     __m128 mmXZ = mmDepth_0_3 * _mm_load_ps(&XZ_WEIGHTS[0]) + mmDepth_5_8 * _mm_load_ps(&XZ_WEIGHTS[4]);
107     __m128 mmYZ = mmDepth_0_3 * _mm_load_ps(&YZ_WEIGHTS[0]) + mmDepth_5_8 * _mm_load_ps(&YZ_WEIGHTS[4]);
108 
109 
110     float xz = mmXZ.array[0] + mmXZ.array[1] + mmXZ.array[2] + mmXZ.array[3];
111     float yz = mmYZ.array[0] + mmYZ.array[1] + mmYZ.array[2] + mmYZ.array[3];
112 
113     // Y inversion happens here.
114     __m128 mmNormal = _mm_setr_ps(-xz, 
115                                    yz, 
116                                    0.382658847856f, // this depends on sigma, expected value for xx * yy (4 * WEIGHTS[0] + 2 * WEIGHTS[1])
117                                                     // Note that we use the normalization step to factor by xx (which is equal to yy)
118                                    0.0f);
119     mmNormal = _mm_fast_normalize_ps(mmNormal);
120     return vec3f(mmNormal.array[0], mmNormal.array[1], mmNormal.array[2]);
121 }