9
3

func_geometric_simd.inl 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. /// @ref core
  2. /// @file glm/detail/func_geometric_simd.inl
  3. #include "../simd/geometric.h"
  4. #if GLM_ARCH & GLM_ARCH_SSE2_BIT
  5. namespace glm{
  6. namespace detail
  7. {
  8. template<qualifier Q>
  9. struct compute_length<4, float, Q, true>
  10. {
  11. GLM_FUNC_QUALIFIER static float call(vec<4, float, Q> const& v)
  12. {
  13. return _mm_cvtss_f32(glm_vec4_length(v.data));
  14. }
  15. };
  16. template<qualifier Q>
  17. struct compute_distance<4, float, Q, true>
  18. {
  19. GLM_FUNC_QUALIFIER static float call(vec<4, float, Q> const& p0, vec<4, float, Q> const& p1)
  20. {
  21. return _mm_cvtss_f32(glm_vec4_distance(p0.data, p1.data));
  22. }
  23. };
  24. template<qualifier Q>
  25. struct compute_dot<vec<4, float, Q>, float, true>
  26. {
  27. GLM_FUNC_QUALIFIER static float call(vec<4, float, Q> const& x, vec<4, float, Q> const& y)
  28. {
  29. return _mm_cvtss_f32(glm_vec1_dot(x.data, y.data));
  30. }
  31. };
  32. template<qualifier Q>
  33. struct compute_cross<float, Q, true>
  34. {
  35. GLM_FUNC_QUALIFIER static vec<3, float, Q> call(vec<3, float, Q> const& a, vec<3, float, Q> const& b)
  36. {
  37. __m128 const set0 = _mm_set_ps(0.0f, a.z, a.y, a.x);
  38. __m128 const set1 = _mm_set_ps(0.0f, b.z, b.y, b.x);
  39. __m128 const xpd0 = glm_vec4_cross(set0, set1);
  40. vec<4, float, Q> Result;
  41. Result.data = xpd0;
  42. return vec<3, float, Q>(Result);
  43. }
  44. };
  45. template<qualifier Q>
  46. struct compute_normalize<4, float, Q, true>
  47. {
  48. GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& v)
  49. {
  50. vec<4, float, Q> Result;
  51. Result.data = glm_vec4_normalize(v.data);
  52. return Result;
  53. }
  54. };
  55. template<qualifier Q>
  56. struct compute_faceforward<4, float, Q, true>
  57. {
  58. GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& N, vec<4, float, Q> const& I, vec<4, float, Q> const& Nref)
  59. {
  60. vec<4, float, Q> Result;
  61. Result.data = glm_vec4_faceforward(N.data, I.data, Nref.data);
  62. return Result;
  63. }
  64. };
  65. template<qualifier Q>
  66. struct compute_reflect<4, float, Q, true>
  67. {
  68. GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& I, vec<4, float, Q> const& N)
  69. {
  70. vec<4, float, Q> Result;
  71. Result.data = glm_vec4_reflect(I.data, N.data);
  72. return Result;
  73. }
  74. };
  75. template<qualifier Q>
  76. struct compute_refract<4, float, Q, true>
  77. {
  78. GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& I, vec<4, float, Q> const& N, float eta)
  79. {
  80. vec<4, float, Q> Result;
  81. Result.data = glm_vec4_refract(I.data, N.data, _mm_set1_ps(eta));
  82. return Result;
  83. }
  84. };
  85. }//namespace detail
  86. }//namespace glm
  87. #elif GLM_ARCH & GLM_ARCH_NEON_BIT
  88. namespace glm{
  89. namespace detail
  90. {
  91. template<qualifier Q>
  92. struct compute_length<4, float, Q, true>
  93. {
  94. GLM_FUNC_QUALIFIER static float call(vec<4, float, Q> const& v)
  95. {
  96. return sqrt(compute_dot<vec<4, float, Q>, float, true>::call(v, v));
  97. }
  98. };
  99. template<qualifier Q>
  100. struct compute_distance<4, float, Q, true>
  101. {
  102. GLM_FUNC_QUALIFIER static float call(vec<4, float, Q> const& p0, vec<4, float, Q> const& p1)
  103. {
  104. return compute_length<4, float, Q, true>::call(p1 - p0);
  105. }
  106. };
  107. template<qualifier Q>
  108. struct compute_dot<vec<4, float, Q>, float, true>
  109. {
  110. GLM_FUNC_QUALIFIER static float call(vec<4, float, Q> const& x, vec<4, float, Q> const& y)
  111. {
  112. #if GLM_ARCH & GLM_ARCH_ARMV8_BIT
  113. float32x4_t v = vmulq_f32(x.data, y.data);
  114. return vaddvq_f32(v);
  115. #else // Armv7a with Neon
  116. float32x4_t p = vmulq_f32(x.data, y.data);
  117. float32x2_t v = vpadd_f32(vget_low_f32(p), vget_high_f32(p));
  118. v = vpadd_f32(v, v);
  119. return vget_lane_f32(v, 0);
  120. #endif
  121. }
  122. };
  123. template<qualifier Q>
  124. struct compute_normalize<4, float, Q, true>
  125. {
  126. GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& v)
  127. {
  128. float32x4_t p = vmulq_f32(v.data, v.data);
  129. #if GLM_ARCH & GLM_ARCH_ARMV8_BIT
  130. p = vpaddq_f32(p, p);
  131. p = vpaddq_f32(p, p);
  132. #else
  133. float32x2_t t = vpadd_f32(vget_low_f32(p), vget_high_f32(p));
  134. t = vpadd_f32(t, t);
  135. p = vcombine_f32(t, t);
  136. #endif
  137. float32x4_t vd = vrsqrteq_f32(p);
  138. vec<4, float, Q> Result;
  139. Result.data = vmulq_f32(v.data, vd);
  140. return Result;
  141. }
  142. };
  143. }//namespace detail
  144. }//namespace glm
  145. #endif//GLM_ARCH & GLM_ARCH_SSE2_BIT