123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155 |
- /// @ref simd_neon
- /// @file glm/simd/neon.h
- #pragma once
- #if GLM_ARCH & GLM_ARCH_NEON_BIT
- #include <arm_neon.h>
- namespace glm {
- namespace neon {
- static float32x4_t dupq_lane(float32x4_t vsrc, int lane) {
- switch(lane) {
- #if GLM_ARCH & GLM_ARCH_ARMV8_BIT
- case 0: return vdupq_laneq_f32(vsrc, 0);
- case 1: return vdupq_laneq_f32(vsrc, 1);
- case 2: return vdupq_laneq_f32(vsrc, 2);
- case 3: return vdupq_laneq_f32(vsrc, 3);
- #else
- case 0: return vdupq_n_f32(vgetq_lane_f32(vsrc, 0));
- case 1: return vdupq_n_f32(vgetq_lane_f32(vsrc, 1));
- case 2: return vdupq_n_f32(vgetq_lane_f32(vsrc, 2));
- case 3: return vdupq_n_f32(vgetq_lane_f32(vsrc, 3));
- #endif
- }
- assert(!"Unreachable code executed!");
- return vdupq_n_f32(0.0f);
- }
- static float32x2_t dup_lane(float32x4_t vsrc, int lane) {
- switch(lane) {
- #if GLM_ARCH & GLM_ARCH_ARMV8_BIT
- case 0: return vdup_laneq_f32(vsrc, 0);
- case 1: return vdup_laneq_f32(vsrc, 1);
- case 2: return vdup_laneq_f32(vsrc, 2);
- case 3: return vdup_laneq_f32(vsrc, 3);
- #else
- case 0: return vdup_n_f32(vgetq_lane_f32(vsrc, 0));
- case 1: return vdup_n_f32(vgetq_lane_f32(vsrc, 1));
- case 2: return vdup_n_f32(vgetq_lane_f32(vsrc, 2));
- case 3: return vdup_n_f32(vgetq_lane_f32(vsrc, 3));
- #endif
- }
- assert(!"Unreachable code executed!");
- return vdup_n_f32(0.0f);
- }
- static float32x4_t copy_lane(float32x4_t vdst, int dlane, float32x4_t vsrc, int slane) {
- #if GLM_ARCH & GLM_ARCH_ARMV8_BIT
- switch(dlane) {
- case 0:
- switch(slane) {
- case 0: return vcopyq_laneq_f32(vdst, 0, vsrc, 0);
- case 1: return vcopyq_laneq_f32(vdst, 0, vsrc, 1);
- case 2: return vcopyq_laneq_f32(vdst, 0, vsrc, 2);
- case 3: return vcopyq_laneq_f32(vdst, 0, vsrc, 3);
- }
- assert(!"Unreachable code executed!");
- case 1:
- switch(slane) {
- case 0: return vcopyq_laneq_f32(vdst, 1, vsrc, 0);
- case 1: return vcopyq_laneq_f32(vdst, 1, vsrc, 1);
- case 2: return vcopyq_laneq_f32(vdst, 1, vsrc, 2);
- case 3: return vcopyq_laneq_f32(vdst, 1, vsrc, 3);
- }
- assert(!"Unreachable code executed!");
- case 2:
- switch(slane) {
- case 0: return vcopyq_laneq_f32(vdst, 2, vsrc, 0);
- case 1: return vcopyq_laneq_f32(vdst, 2, vsrc, 1);
- case 2: return vcopyq_laneq_f32(vdst, 2, vsrc, 2);
- case 3: return vcopyq_laneq_f32(vdst, 2, vsrc, 3);
- }
- assert(!"Unreachable code executed!");
- case 3:
- switch(slane) {
- case 0: return vcopyq_laneq_f32(vdst, 3, vsrc, 0);
- case 1: return vcopyq_laneq_f32(vdst, 3, vsrc, 1);
- case 2: return vcopyq_laneq_f32(vdst, 3, vsrc, 2);
- case 3: return vcopyq_laneq_f32(vdst, 3, vsrc, 3);
- }
- assert(!"Unreachable code executed!");
- }
- #else
- float l;
- switch(slane) {
- case 0: l = vgetq_lane_f32(vsrc, 0); break;
- case 1: l = vgetq_lane_f32(vsrc, 1); break;
- case 2: l = vgetq_lane_f32(vsrc, 2); break;
- case 3: l = vgetq_lane_f32(vsrc, 3); break;
- default:
- assert(!"Unreachable code executed!");
- }
- switch(dlane) {
- case 0: return vsetq_lane_f32(l, vdst, 0);
- case 1: return vsetq_lane_f32(l, vdst, 1);
- case 2: return vsetq_lane_f32(l, vdst, 2);
- case 3: return vsetq_lane_f32(l, vdst, 3);
- }
- #endif
- assert(!"Unreachable code executed!");
- return vdupq_n_f32(0.0f);
- }
- static float32x4_t mul_lane(float32x4_t v, float32x4_t vlane, int lane) {
- #if GLM_ARCH & GLM_ARCH_ARMV8_BIT
- switch(lane) {
- case 0: return vmulq_laneq_f32(v, vlane, 0); break;
- case 1: return vmulq_laneq_f32(v, vlane, 1); break;
- case 2: return vmulq_laneq_f32(v, vlane, 2); break;
- case 3: return vmulq_laneq_f32(v, vlane, 3); break;
- default:
- assert(!"Unreachable code executed!");
- }
- assert(!"Unreachable code executed!");
- return vdupq_n_f32(0.0f);
- #else
- return vmulq_f32(v, dupq_lane(vlane, lane));
- #endif
- }
- static float32x4_t madd_lane(float32x4_t acc, float32x4_t v, float32x4_t vlane, int lane) {
- #if GLM_ARCH & GLM_ARCH_ARMV8_BIT
- #ifdef GLM_CONFIG_FORCE_FMA
- # define FMADD_LANE(acc, x, y, L) do { asm volatile ("fmla %0.4s, %1.4s, %2.4s" : "+w"(acc) : "w"(x), "w"(dup_lane(y, L))); } while(0)
- #else
- # define FMADD_LANE(acc, x, y, L) do { acc = vmlaq_laneq_f32(acc, x, y, L); } while(0)
- #endif
- switch(lane) {
- case 0:
- FMADD_LANE(acc, v, vlane, 0);
- return acc;
- case 1:
- FMADD_LANE(acc, v, vlane, 1);
- return acc;
- case 2:
- FMADD_LANE(acc, v, vlane, 2);
- return acc;
- case 3:
- FMADD_LANE(acc, v, vlane, 3);
- return acc;
- default:
- assert(!"Unreachable code executed!");
- }
- assert(!"Unreachable code executed!");
- return vdupq_n_f32(0.0f);
- # undef FMADD_LANE
- #else
- return vaddq_f32(acc, vmulq_f32(v, dupq_lane(vlane, lane)));
- #endif
- }
- } //namespace neon
- } // namespace glm
- #endif // GLM_ARCH & GLM_ARCH_NEON_BIT
|