transpose.hpp 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. // Boost.uBLAS
  2. //
  3. // Copyright (c) 2018 Fady Essam
  4. // Copyright (c) 2018 Stefan Seefeld
  5. //
  6. // Distributed under the Boost Software License, Version 1.0.
  7. // (See accompanying file LICENSE_1_0.txt or
  8. // copy at http://www.boost.org/LICENSE_1_0.txt)
  9. #ifndef boost_numeric_ublas_opencl_transpose_hpp_
  10. #define boost_numeric_ublas_opencl_transpose_hpp_
  11. #include <boost/numeric/ublas/opencl/library.hpp>
  12. #include <boost/numeric/ublas/opencl/vector.hpp>
  13. #include <boost/numeric/ublas/opencl/matrix.hpp>
  14. // Kernel for transposition of various data types
  15. #define OPENCL_TRANSPOSITION_KERNEL(DATA_TYPE) \
  16. "__kernel void transpose(__global " #DATA_TYPE "* in, __global " #DATA_TYPE "* result, unsigned int width, unsigned int height) \n" \
  17. "{ \n" \
  18. " unsigned int column_index = get_global_id(0); \n" \
  19. " unsigned int row_index = get_global_id(1); \n" \
  20. " if (column_index < width && row_index < height) \n" \
  21. " { \n" \
  22. " unsigned int index_in = column_index + width * row_index; \n" \
  23. " unsigned int index_result = row_index + height * column_index; \n" \
  24. " result[index_result] = in[index_in]; \n" \
  25. " } \n" \
  26. "} \n"
  27. namespace boost { namespace numeric { namespace ublas { namespace opencl {
  28. template<class T, class L1, class L2>
  29. typename std::enable_if<is_numeric<T>::value>::type
  30. change_layout(ublas::matrix<T, L1, opencl::storage> const &m,
  31. ublas::matrix<T, L2, opencl::storage> &result,
  32. compute::command_queue& queue)
  33. {
  34. assert(m.size1() == result.size1() && m.size2() == result.size2());
  35. assert(m.device() == result.device() && m.device() == queue.get_device());
  36. assert(!(std::is_same<L1, L2>::value));
  37. char const *kernel;
  38. if (std::is_same<T, float>::value)
  39. kernel = OPENCL_TRANSPOSITION_KERNEL(float);
  40. else if (std::is_same<T, double>::value)
  41. kernel = OPENCL_TRANSPOSITION_KERNEL(double);
  42. else if (std::is_same<T, std::complex<float>>::value)
  43. kernel = OPENCL_TRANSPOSITION_KERNEL(float2);
  44. else if (std::is_same<T, std::complex<double>>::value)
  45. kernel = OPENCL_TRANSPOSITION_KERNEL(double2);
  46. size_t len = strlen(kernel);
  47. cl_int err;
  48. cl_context c_context = queue.get_context().get();
  49. cl_program program = clCreateProgramWithSource(c_context, 1, &kernel, &len, &err);
  50. clBuildProgram(program, 1, &queue.get_device().get(), NULL, NULL, NULL);
  51. cl_kernel c_kernel = clCreateKernel(program, "transpose", &err);
  52. size_t width = std::is_same < L1, ublas::basic_row_major<>>::value ? m.size2() : m.size1();
  53. size_t height = std::is_same < L1, ublas::basic_row_major<>>::value ? m.size1() : m.size2();
  54. size_t global_size[2] = { width , height };
  55. clSetKernelArg(c_kernel, 0, sizeof(T*), &m.begin().get_buffer().get());
  56. clSetKernelArg(c_kernel, 1, sizeof(T*), &result.begin().get_buffer().get());
  57. clSetKernelArg(c_kernel, 2, sizeof(unsigned int), &width);
  58. clSetKernelArg(c_kernel, 3, sizeof(unsigned int), &height);
  59. cl_command_queue c_queue = queue.get();
  60. cl_event event = NULL;
  61. clEnqueueNDRangeKernel(c_queue, c_kernel, 2, NULL, global_size, NULL, 0, NULL, &event);
  62. clWaitForEvents(1, &event);
  63. }
  64. template<class T, class L1, class L2, class A>
  65. typename std::enable_if<is_numeric<T>::value>::type
  66. change_layout(ublas::matrix<T, L1, A> const &m,
  67. ublas::matrix<T, L2, A> &result,
  68. compute::command_queue& queue)
  69. {
  70. ublas::matrix<T, L1, opencl::storage> mdev(m, queue);
  71. ublas::matrix<T, L2, opencl::storage> rdev(result.size1(), result.size2(), queue.get_context());
  72. change_layout(mdev, rdev, queue);
  73. rdev.to_host(result, queue);
  74. }
  75. template<class T, class L>
  76. typename std::enable_if<is_numeric<T>::value>::type
  77. trans(ublas::matrix<T, L, opencl::storage> const &m,
  78. ublas::matrix<T, L, opencl::storage> &result,
  79. compute::command_queue& queue)
  80. {
  81. assert(m.size1() == result.size2() && m.size2() == result.size1());
  82. assert(m.device() == result.device() && m.device() == queue.get_device());
  83. char const *kernel;
  84. if (std::is_same<T, float>::value)
  85. kernel = OPENCL_TRANSPOSITION_KERNEL(float);
  86. else if (std::is_same<T, double>::value)
  87. kernel = OPENCL_TRANSPOSITION_KERNEL(double);
  88. else if (std::is_same<T, std::complex<float>>::value)
  89. kernel = OPENCL_TRANSPOSITION_KERNEL(float2);
  90. else if (std::is_same<T, std::complex<double>>::value)
  91. kernel = OPENCL_TRANSPOSITION_KERNEL(double2);
  92. size_t len = strlen(kernel);
  93. cl_int err;
  94. cl_context c_context = queue.get_context().get();
  95. cl_program program = clCreateProgramWithSource(c_context, 1, &kernel, &len, &err);
  96. clBuildProgram(program, 1, &queue.get_device().get(), NULL, NULL, NULL);
  97. cl_kernel c_kernel = clCreateKernel(program, "transpose", &err);
  98. size_t width = std::is_same <L, ublas::basic_row_major<>>::value ? m.size2() : m.size1();
  99. size_t height = std::is_same <L, ublas::basic_row_major<>>::value ? m.size1() : m.size2();
  100. size_t global_size[2] = { width , height };
  101. clSetKernelArg(c_kernel, 0, sizeof(T*), &m.begin().get_buffer().get());
  102. clSetKernelArg(c_kernel, 1, sizeof(T*), &result.begin().get_buffer().get());
  103. clSetKernelArg(c_kernel, 2, sizeof(unsigned int), &width);
  104. clSetKernelArg(c_kernel, 3, sizeof(unsigned int), &height);
  105. cl_command_queue c_queue = queue.get();
  106. cl_event event = NULL;
  107. clEnqueueNDRangeKernel(c_queue, c_kernel, 2, NULL, global_size, NULL, 0, NULL, &event);
  108. clWaitForEvents(1, &event);
  109. }
  110. template<class T, class L, class A>
  111. typename std::enable_if<is_numeric<T>::value>::type
  112. trans(ublas::matrix<T, L, A> const &m,
  113. ublas::matrix<T, L, A> &result,
  114. compute::command_queue& queue)
  115. {
  116. ublas::matrix<T, L, opencl::storage> mdev(m, queue);
  117. ublas::matrix<T, L, opencl::storage> rdev(result.size1(), result.size2(), queue.get_context());
  118. trans(mdev, rdev, queue);
  119. rdev.to_host(result, queue);
  120. }
  121. template<class T, class L, class A>
  122. typename std::enable_if<is_numeric<T>::value, ublas::matrix<T, L, A>>::type
  123. trans(ublas::matrix<T, L, A>& m, compute::command_queue& queue)
  124. {
  125. ublas::matrix<T, L, A> result(m.size2(), m.size1());
  126. trans(m, result, queue);
  127. return result;
  128. }
  129. }}}}
  130. #endif