misc.hpp 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182
  1. // Boost.uBLAS
  2. //
  3. // Copyright (c) 2018 Fady Essam
  4. // Copyright (c) 2018 Stefan Seefeld
  5. //
  6. // Distributed under the Boost Software License, Version 1.0.
  7. // (See accompanying file LICENSE_1_0.txt or
  8. // copy at http://www.boost.org/LICENSE_1_0.txt)
  9. #ifndef boost_numeric_ublas_opencl_misc_hpp_
  10. #define boost_numeric_ublas_opencl_misc_hpp_
  11. #include <boost/numeric/ublas/opencl/library.hpp>
  12. #include <boost/numeric/ublas/opencl/vector.hpp>
  13. #include <boost/numeric/ublas/opencl/matrix.hpp>
  14. namespace boost { namespace numeric { namespace ublas { namespace opencl {
  15. template <typename T>
  16. typename std::enable_if<is_numeric<T>::value, T>::type
  17. a_sum(ublas::vector<T, opencl::storage> const &v, compute::command_queue& queue)
  18. {
  19. compute::vector<T> scratch_buffer(v.size(), queue.get_context());
  20. compute::vector<T> result_buffer(1, queue.get_context());
  21. cl_event event;
  22. if (std::is_same<T, float>::value)
  23. clblasSasum(v.size(),
  24. result_buffer.begin().get_buffer().get(), //result buffer
  25. 0, //offset in result buffer
  26. v.begin().get_buffer().get(), //input buffer
  27. 0, //offset in input buffer
  28. 1, //increment in input buffer
  29. scratch_buffer.begin().get_buffer().get(),
  30. 1, //number of command queues
  31. &(queue.get()), //queue
  32. 0, // number of events waiting list
  33. NULL, //event waiting list
  34. &event); //event
  35. else if (std::is_same<T, double>::value)
  36. clblasDasum(v.size(),
  37. result_buffer.begin().get_buffer().get(), //result buffer
  38. 0, //offset in result buffer
  39. v.begin().get_buffer().get(), //input buffer
  40. 0, //offset in input buffer
  41. 1, //increment in input buffer
  42. scratch_buffer.begin().get_buffer().get(),
  43. 1, //number of command queues
  44. &(queue.get()), //queue
  45. 0, // number of events waiting list
  46. NULL, //event waiting list
  47. &event); //event
  48. else if (std::is_same<T, std::complex<float>>::value)
  49. clblasScasum(v.size(),
  50. result_buffer.begin().get_buffer().get(), //result buffer
  51. 0, //offset in result buffer
  52. v.begin().get_buffer().get(), //input buffer
  53. 0, //offset in input buffer
  54. 1, //increment in input buffer
  55. scratch_buffer.begin().get_buffer().get(),
  56. 1, //number of command queues
  57. &(queue.get()), //queue
  58. 0, // number of events waiting list
  59. NULL, //event waiting list
  60. &event); //event
  61. else if (std::is_same<T, std::complex<double>>::value)
  62. clblasDzasum(v.size(),
  63. result_buffer.begin().get_buffer().get(), //result buffer
  64. 0, //offset in result buffer
  65. v.begin().get_buffer().get(), //input buffer
  66. 0, //offset in input buffer
  67. 1, //increment in input buffer
  68. scratch_buffer.begin().get_buffer().get(),
  69. 1, //number of command queues
  70. &(queue.get()), //queue
  71. 0, // number of events waiting list
  72. NULL, //event waiting list
  73. &event); //event
  74. clWaitForEvents(1, &event);
  75. return result_buffer[0];
  76. }
  77. template <typename T, typename A>
  78. typename std::enable_if<is_numeric<T>::value, T>::type
  79. a_sum(ublas::vector<T, A> const &v, compute::command_queue& queue)
  80. {
  81. ublas::vector<T, opencl::storage> vdev(v, queue);
  82. return a_sum(vdev, queue);
  83. }
  84. template <typename T>
  85. typename std::enable_if<std::is_same<T, float>::value |
  86. std::is_same<T, double>::value,
  87. T>::type
  88. norm_1(ublas::vector<T, opencl::storage> const &v, compute::command_queue& queue)
  89. {
  90. return a_sum(v, queue);
  91. }
  92. template <typename T, typename A>
  93. typename std::enable_if<std::is_same<T, float>::value |
  94. std::is_same<T, double>::value,
  95. T>::type
  96. norm_1(ublas::vector<T, A> const &v, compute::command_queue& queue)
  97. {
  98. ublas::vector<T, opencl::storage> vdev(v, queue);
  99. return norm_1(vdev, queue);
  100. }
  101. template <typename T>
  102. typename std::enable_if<is_numeric<T>::value, T>::type
  103. norm_2(ublas::vector<T, opencl::storage> const &v, compute::command_queue& queue)
  104. {
  105. compute::vector<T> scratch_buffer(2*v.size(), queue.get_context());
  106. compute::vector<T> result_buffer(1, queue.get_context());
  107. cl_event event;
  108. if (std::is_same<T, float>::value)
  109. clblasSnrm2(v.size(),
  110. result_buffer.begin().get_buffer().get(), //result buffer
  111. 0, //offset in result buffer
  112. v.begin().get_buffer().get(), //input buffer
  113. 0, //offset in input buffer
  114. 1, //increment in input buffer
  115. scratch_buffer.begin().get_buffer().get(),
  116. 1, //number of command queues
  117. &(queue.get()), //queue
  118. 0, // number of events waiting list
  119. NULL, //event waiting list
  120. &event); //event
  121. else if (std::is_same<T, double>::value)
  122. clblasDnrm2(v.size(),
  123. result_buffer.begin().get_buffer().get(), //result buffer
  124. 0, //offset in result buffer
  125. v.begin().get_buffer().get(), //input buffer
  126. 0, //offset in input buffer
  127. 1, //increment in input buffer
  128. scratch_buffer.begin().get_buffer().get(),
  129. 1, //number of command queues
  130. &(queue.get()), //queue
  131. 0, // number of events waiting list
  132. NULL, //event waiting list
  133. &event); //event
  134. else if (std::is_same<T, std::complex<float>>::value)
  135. clblasScnrm2(v.size(),
  136. result_buffer.begin().get_buffer().get(), //result buffer
  137. 0, //offset in result buffer
  138. v.begin().get_buffer().get(), //input buffer
  139. 0, //offset in input buffer
  140. 1, //increment in input buffer
  141. scratch_buffer.begin().get_buffer().get(),
  142. 1, //number of command queues
  143. &(queue.get()), //queue
  144. 0, // number of events waiting list
  145. NULL, //event waiting list
  146. &event); //event
  147. else if (std::is_same<T, std::complex<double>>::value)
  148. clblasDznrm2(v.size(),
  149. result_buffer.begin().get_buffer().get(), //result buffer
  150. 0, //offset in result buffer
  151. v.begin().get_buffer().get(), //input buffer
  152. 0, //offset in input buffer
  153. 1, //increment in input buffer
  154. scratch_buffer.begin().get_buffer().get(),
  155. 1, //number of command queues
  156. &(queue.get()), //queue
  157. 0, // number of events waiting list
  158. NULL, //event waiting list
  159. &event); //event
  160. clWaitForEvents(1, &event);
  161. return result_buffer[0];
  162. }
  163. template <typename T, typename A>
  164. typename std::enable_if<is_numeric<T>::value, T>::type
  165. norm_2(ublas::vector<T, A> const &v, compute::command_queue& queue)
  166. {
  167. ublas::vector<T, opencl::storage> vdev(v, queue);
  168. return norm_2(vdev, queue);
  169. }
  170. }}}}
  171. #endif