Clutter Engine 0.0.1
Loading...
Searching...
No Matches
neon.h
Go to the documentation of this file.
1
3
4#pragma once
5
6#if GLM_ARCH & GLM_ARCH_NEON_BIT
7#include <arm_neon.h>
8
9namespace glm {
10 namespace neon {
11 static float32x4_t dupq_lane(float32x4_t vsrc, int lane) {
12 switch(lane) {
13#if GLM_ARCH & GLM_ARCH_ARMV8_BIT
14 case 0: return vdupq_laneq_f32(vsrc, 0);
15 case 1: return vdupq_laneq_f32(vsrc, 1);
16 case 2: return vdupq_laneq_f32(vsrc, 2);
17 case 3: return vdupq_laneq_f32(vsrc, 3);
18#else
19 case 0: return vdupq_n_f32(vgetq_lane_f32(vsrc, 0));
20 case 1: return vdupq_n_f32(vgetq_lane_f32(vsrc, 1));
21 case 2: return vdupq_n_f32(vgetq_lane_f32(vsrc, 2));
22 case 3: return vdupq_n_f32(vgetq_lane_f32(vsrc, 3));
23#endif
24 }
25 assert(!"Unreachable code executed!");
26 return vdupq_n_f32(0.0f);
27 }
28
29 static float32x2_t dup_lane(float32x4_t vsrc, int lane) {
30 switch(lane) {
31#if GLM_ARCH & GLM_ARCH_ARMV8_BIT
32 case 0: return vdup_laneq_f32(vsrc, 0);
33 case 1: return vdup_laneq_f32(vsrc, 1);
34 case 2: return vdup_laneq_f32(vsrc, 2);
35 case 3: return vdup_laneq_f32(vsrc, 3);
36#else
37 case 0: return vdup_n_f32(vgetq_lane_f32(vsrc, 0));
38 case 1: return vdup_n_f32(vgetq_lane_f32(vsrc, 1));
39 case 2: return vdup_n_f32(vgetq_lane_f32(vsrc, 2));
40 case 3: return vdup_n_f32(vgetq_lane_f32(vsrc, 3));
41#endif
42 }
43 assert(!"Unreachable code executed!");
44 return vdup_n_f32(0.0f);
45 }
46
47 static float32x4_t copy_lane(float32x4_t vdst, int dlane, float32x4_t vsrc, int slane) {
48#if GLM_ARCH & GLM_ARCH_ARMV8_BIT
49 switch(dlane) {
50 case 0:
51 switch(slane) {
52 case 0: return vcopyq_laneq_f32(vdst, 0, vsrc, 0);
53 case 1: return vcopyq_laneq_f32(vdst, 0, vsrc, 1);
54 case 2: return vcopyq_laneq_f32(vdst, 0, vsrc, 2);
55 case 3: return vcopyq_laneq_f32(vdst, 0, vsrc, 3);
56 }
57 assert(!"Unreachable code executed!");
58 case 1:
59 switch(slane) {
60 case 0: return vcopyq_laneq_f32(vdst, 1, vsrc, 0);
61 case 1: return vcopyq_laneq_f32(vdst, 1, vsrc, 1);
62 case 2: return vcopyq_laneq_f32(vdst, 1, vsrc, 2);
63 case 3: return vcopyq_laneq_f32(vdst, 1, vsrc, 3);
64 }
65 assert(!"Unreachable code executed!");
66 case 2:
67 switch(slane) {
68 case 0: return vcopyq_laneq_f32(vdst, 2, vsrc, 0);
69 case 1: return vcopyq_laneq_f32(vdst, 2, vsrc, 1);
70 case 2: return vcopyq_laneq_f32(vdst, 2, vsrc, 2);
71 case 3: return vcopyq_laneq_f32(vdst, 2, vsrc, 3);
72 }
73 assert(!"Unreachable code executed!");
74 case 3:
75 switch(slane) {
76 case 0: return vcopyq_laneq_f32(vdst, 3, vsrc, 0);
77 case 1: return vcopyq_laneq_f32(vdst, 3, vsrc, 1);
78 case 2: return vcopyq_laneq_f32(vdst, 3, vsrc, 2);
79 case 3: return vcopyq_laneq_f32(vdst, 3, vsrc, 3);
80 }
81 assert(!"Unreachable code executed!");
82 }
83#else
84
85 float l;
86 switch(slane) {
87 case 0: l = vgetq_lane_f32(vsrc, 0); break;
88 case 1: l = vgetq_lane_f32(vsrc, 1); break;
89 case 2: l = vgetq_lane_f32(vsrc, 2); break;
90 case 3: l = vgetq_lane_f32(vsrc, 3); break;
91 default:
92 assert(!"Unreachable code executed!");
93 }
94 switch(dlane) {
95 case 0: return vsetq_lane_f32(l, vdst, 0);
96 case 1: return vsetq_lane_f32(l, vdst, 1);
97 case 2: return vsetq_lane_f32(l, vdst, 2);
98 case 3: return vsetq_lane_f32(l, vdst, 3);
99 }
100#endif
101 assert(!"Unreachable code executed!");
102 return vdupq_n_f32(0.0f);
103 }
104
105 static float32x4_t mul_lane(float32x4_t v, float32x4_t vlane, int lane) {
106#if GLM_ARCH & GLM_ARCH_ARMV8_BIT
107 switch(lane) {
108 case 0: return vmulq_laneq_f32(v, vlane, 0); break;
109 case 1: return vmulq_laneq_f32(v, vlane, 1); break;
110 case 2: return vmulq_laneq_f32(v, vlane, 2); break;
111 case 3: return vmulq_laneq_f32(v, vlane, 3); break;
112 default:
113 assert(!"Unreachable code executed!");
114 }
115 assert(!"Unreachable code executed!");
116 return vdupq_n_f32(0.0f);
117#else
118 return vmulq_f32(v, dupq_lane(vlane, lane));
119#endif
120 }
121
122 static float32x4_t madd_lane(float32x4_t acc, float32x4_t v, float32x4_t vlane, int lane) {
123#if GLM_ARCH & GLM_ARCH_ARMV8_BIT
124#ifdef GLM_CONFIG_FORCE_FMA
125# define FMADD_LANE(acc, x, y, L) do { asm volatile ("fmla %0.4s, %1.4s, %2.4s" : "+w"(acc) : "w"(x), "w"(dup_lane(y, L))); } while(0)
126#else
127# define FMADD_LANE(acc, x, y, L) do { acc = vmlaq_laneq_f32(acc, x, y, L); } while(0)
128#endif
129
130 switch(lane) {
131 case 0:
132 FMADD_LANE(acc, v, vlane, 0);
133 return acc;
134 case 1:
135 FMADD_LANE(acc, v, vlane, 1);
136 return acc;
137 case 2:
138 FMADD_LANE(acc, v, vlane, 2);
139 return acc;
140 case 3:
141 FMADD_LANE(acc, v, vlane, 3);
142 return acc;
143 default:
144 assert(!"Unreachable code executed!");
145 }
146 assert(!"Unreachable code executed!");
147 return vdupq_n_f32(0.0f);
148# undef FMADD_LANE
149#else
150 return vaddq_f32(acc, vmulq_f32(v, dupq_lane(vlane, lane)));
151#endif
152 }
153 } //namespace neon
154} // namespace glm
155#endif // GLM_ARCH & GLM_ARCH_NEON_BIT
Core features
Definition common.hpp:21