elementwise.hpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508
  1. // Boost.uBLAS
  2. //
  3. // Copyright (c) 2018 Fady Essam
  4. // Copyright (c) 2018 Stefan Seefeld
  5. //
  6. // Distributed under the Boost Software License, Version 1.0.
  7. // (See accompanying file LICENSE_1_0.txt or
  8. // copy at http://www.boost.org/LICENSE_1_0.txt)
  9. #ifndef boost_numeric_ublas_opencl_elementwise_hpp_
  10. #define boost_numeric_ublas_opencl_elementwise_hpp_
  11. #include <boost/numeric/ublas/opencl/library.hpp>
  12. #include <boost/numeric/ublas/opencl/vector.hpp>
  13. #include <boost/numeric/ublas/opencl/matrix.hpp>
  14. namespace boost { namespace numeric { namespace ublas { namespace opencl {
  15. namespace compute = boost::compute;
  16. namespace lambda = boost::compute::lambda;
  17. template <typename T, typename L1, typename L2, typename L3, class O>
  18. void element_wise(ublas::matrix<T, L1, opencl::storage> const &a,
  19. ublas::matrix<T, L2, opencl::storage> const &b,
  20. ublas::matrix<T, L3, opencl::storage> &result,
  21. O op, compute::command_queue& queue)
  22. {
  23. assert(a.device() == b.device() &&
  24. a.device() == result.device() &&
  25. a.device() == queue.get_device());
  26. assert(a.size1() == b.size1() && a.size2() == b.size2());
  27. compute::transform(a.begin(),
  28. a.end(),
  29. b.begin(),
  30. result.begin(),
  31. op,
  32. queue);
  33. queue.finish();
  34. }
  35. template <typename T, typename L1, typename L2, typename L3, typename A, class O>
  36. void element_wise(ublas::matrix<T, L1, A> const &a,
  37. ublas::matrix<T, L2, A> const &b,
  38. ublas::matrix<T, L3, A> &result,
  39. O op,
  40. compute::command_queue &queue)
  41. {
  42. ublas::matrix<T, L1, opencl::storage> adev(a, queue);
  43. ublas::matrix<T, L2, opencl::storage> bdev(b, queue);
  44. ublas::matrix<T, L3, opencl::storage> rdev(a.size1(), b.size2(), queue.get_context());
  45. element_wise(adev, bdev, rdev, op, queue);
  46. rdev.to_host(result, queue);
  47. }
  48. template <typename T, typename L1, typename L2, typename A, typename O>
  49. ublas::matrix<T, L1, A> element_wise(ublas::matrix<T, L1, A> const &a,
  50. ublas::matrix<T, L2, A> const &b,
  51. O op,
  52. compute::command_queue &queue)
  53. {
  54. ublas::matrix<T, L1, A> result(a.size1(), b.size2());
  55. element_wise(a, b, result, op, queue);
  56. return result;
  57. }
  58. template <typename T, typename O>
  59. void element_wise(ublas::vector<T, opencl::storage> const &a,
  60. ublas::vector<T, opencl::storage> const &b,
  61. ublas::vector<T, opencl::storage> &result,
  62. O op,
  63. compute::command_queue& queue)
  64. {
  65. assert(a.device() == b.device() &&
  66. a.device() == result.device() &&
  67. a.device() == queue.get_device());
  68. assert(a.size() == b.size());
  69. compute::transform(a.begin(),
  70. a.end(),
  71. b.begin(),
  72. result.begin(),
  73. op,
  74. queue);
  75. queue.finish();
  76. }
  77. template <typename T, typename A, typename O>
  78. void element_wise(ublas::vector<T, A> const &a,
  79. ublas::vector<T, A> const &b,
  80. ublas::vector<T, A>& result,
  81. O op,
  82. compute::command_queue &queue)
  83. {
  84. ublas::vector<T, opencl::storage> adev(a, queue);
  85. ublas::vector<T, opencl::storage> bdev(b, queue);
  86. ublas::vector<T, opencl::storage> rdev(a.size(), queue.get_context());
  87. element_wise(adev, bdev, rdev, op, queue);
  88. rdev.to_host(result, queue);
  89. }
  90. template <typename T, typename A, typename O>
  91. ublas::vector<T, A> element_wise(ublas::vector<T, A> const &a,
  92. ublas::vector<T, A> const &b,
  93. O op,
  94. compute::command_queue &queue)
  95. {
  96. ublas::vector<T, A> result(a.size());
  97. element_wise(a, b, result, op, queue);
  98. return result;
  99. }
  100. template <typename T, typename L1, typename L2, typename L3>
  101. void element_add(ublas::matrix<T, L1, opencl::storage> const &a,
  102. ublas::matrix<T, L2, opencl::storage> const &b,
  103. ublas::matrix<T, L3, opencl::storage> &result,
  104. compute::command_queue &queue)
  105. {
  106. element_wise(a, b, result, compute::plus<T>(), queue);
  107. }
  108. template <typename T, typename L1, typename L2, typename L3, typename A>
  109. void element_add(ublas::matrix<T, L1, A> const &a,
  110. ublas::matrix<T, L2, A> const &b,
  111. ublas::matrix<T, L3, A> &result,
  112. compute::command_queue &queue)
  113. {
  114. element_wise(a, b, result, compute::plus<T>(), queue);
  115. }
  116. template <typename T, typename L1, typename L2, typename A>
  117. ublas::matrix<T, L1, A> element_add(ublas::matrix<T, L1, A> const &a,
  118. ublas::matrix<T, L2, A> const &b,
  119. compute::command_queue &queue)
  120. {
  121. return element_wise(a, b, compute::plus<T>(), queue);
  122. }
  123. template <typename T>
  124. void element_add(ublas::vector<T, opencl::storage> const &a,
  125. ublas::vector<T, opencl::storage> const &b,
  126. ublas::vector<T, opencl::storage> &result,
  127. compute::command_queue& queue)
  128. {
  129. element_wise(a, b, result, compute::plus<T>(), queue);
  130. }
  131. template <typename T, typename A>
  132. void element_add(ublas::vector<T, A> const &a,
  133. ublas::vector<T, A> const &b,
  134. ublas::vector<T, A> &result,
  135. compute::command_queue &queue)
  136. {
  137. element_wise(a, b, result, compute::plus<T>(), queue);
  138. }
  139. template <typename T, typename A>
  140. ublas::vector<T, A> element_add(ublas::vector<T, A> const &a,
  141. ublas::vector<T, A> const &b,
  142. compute::command_queue &queue)
  143. {
  144. return element_wise(a, b, compute::plus<T>(), queue);
  145. }
  146. template<typename T, typename L>
  147. void element_add(ublas::matrix<T, L, opencl::storage> const &m, T value,
  148. ublas::matrix<T, L, opencl::storage> &result,
  149. compute::command_queue& queue)
  150. {
  151. assert(m.device() == result.device() && m.device() == queue.get_device());
  152. assert(m.size1() == result.size1() && m.size2() == result.size2());
  153. compute::transform(m.begin(), m.end(), result.begin(), lambda::_1 + value, queue);
  154. queue.finish();
  155. }
  156. template<typename T, typename L, typename A>
  157. void element_add(ublas::matrix<T, L, A> const &m, T value,
  158. ublas::matrix<T, L, A> &result,
  159. compute::command_queue& queue)
  160. {
  161. ublas::matrix<T, L, opencl::storage> mdev(m, queue);
  162. ublas::matrix<T, L, opencl::storage> rdev(result.size1(), result.size2(), queue.get_context());
  163. element_add(mdev, value, rdev, queue);
  164. rdev.to_host(result, queue);
  165. }
  166. template<typename T, typename L, typename A>
  167. ublas::matrix<T, L, A> element_add(ublas::matrix<T, L, A> const &m, T value,
  168. compute::command_queue& queue)
  169. {
  170. ublas::matrix<T, L, A> result(m.size1(), m.size2());
  171. element_add(m, value, result, queue);
  172. return result;
  173. }
  174. template<typename T>
  175. void element_add(ublas::vector<T, opencl::storage> const &v, T value,
  176. ublas::vector<T, opencl::storage> &result,
  177. compute::command_queue& queue)
  178. {
  179. assert(v.device() == result.device() && v.device() == queue.get_device());
  180. assert(v.size() == result.size());
  181. compute::transform(v.begin(), v.end(), result.begin(), lambda::_1 + value, queue);
  182. queue.finish();
  183. }
  184. template<typename T, typename A>
  185. void element_add(ublas::vector<T, A> const &v, T value,
  186. ublas::vector<T, A> &result,
  187. compute::command_queue& queue)
  188. {
  189. ublas::vector<T, opencl::storage> vdev(v, queue);
  190. ublas::vector<T, opencl::storage> rdev(v.size(), queue.get_context());
  191. element_add(vdev, value, rdev, queue);
  192. rdev.to_host(result, queue);
  193. }
  194. template <typename T, typename A>
  195. ublas::vector<T, A> element_add(ublas::vector<T, A> const &v, T value,
  196. compute::command_queue& queue)
  197. {
  198. ublas::vector<T, A> result(v.size());
  199. element_add(v, value, result, queue);
  200. return result;
  201. }
  202. template <typename T, typename L1, typename L2, typename L3>
  203. void element_sub(ublas::matrix<T, L1, opencl::storage> const &a,
  204. ublas::matrix<T, L2, opencl::storage> const &b,
  205. ublas::matrix<T, L3, opencl::storage> &result,
  206. compute::command_queue& queue)
  207. {
  208. element_wise(a, b, compute::minus<T>(), result, queue);
  209. }
  210. template <typename T, typename L1, typename L2, typename L3, typename A>
  211. void element_sub(ublas::matrix<T, L1, A> const &a,
  212. ublas::matrix<T, L2, A> const &b,
  213. ublas::matrix<T, L3, A> &result,
  214. compute::command_queue &queue)
  215. {
  216. element_wise(a, b, result, compute::minus<T>(), queue);
  217. }
  218. template <typename T, typename L1, typename L2, typename A>
  219. ublas::matrix<T, L1, A> element_sub(ublas::matrix<T, L1, A> const &a,
  220. ublas::matrix<T, L2, A> const &b,
  221. compute::command_queue &queue)
  222. {
  223. return element_wise(a, b, compute::minus<T>(), queue);
  224. }
  225. template <typename T>
  226. void element_sub(ublas::vector<T, opencl::storage> const &a,
  227. ublas::vector<T, opencl::storage> const &b,
  228. ublas::vector<T, opencl::storage> &result,
  229. compute::command_queue& queue)
  230. {
  231. element_wise(a, b, result, compute::minus<T>(), queue);
  232. }
  233. template <typename T, typename A>
  234. void element_sub(ublas::vector<T, A> const &a,
  235. ublas::vector<T, A> const &b,
  236. ublas::vector<T, A> &result,
  237. compute::command_queue &queue)
  238. {
  239. element_wise(a, b, result, compute::minus<T>(), queue);
  240. }
  241. template <typename T, typename A>
  242. ublas::vector<T, A> element_sub(ublas::vector<T, A> const &a,
  243. ublas::vector<T, A> const &b,
  244. compute::command_queue &queue)
  245. {
  246. return element_wise(a, b, compute::minus<T>(), queue);
  247. }
  248. template <typename T, typename L>
  249. void element_sub(ublas::matrix<T, L, opencl::storage> const &m, T value,
  250. ublas::matrix<T, L, opencl::storage> &result,
  251. compute::command_queue& queue)
  252. {
  253. assert(m.device() == result.device() && m.device() == queue.get_device());
  254. assert(m.size1() == result.size1() && m.size2() == result.size2());
  255. compute::transform(m.begin(), m.end(), result.begin(), lambda::_1 - value, queue);
  256. queue.finish();
  257. }
  258. template <typename T, typename L, typename A>
  259. void element_sub(ublas::matrix<T, L, A> const &m, T value,
  260. ublas::matrix<T, L, A> &result,
  261. compute::command_queue& queue)
  262. {
  263. ublas::matrix<T, L, opencl::storage> mdev(m, queue);
  264. ublas::matrix<T, L, opencl::storage> rdev(result.size1(), result.size2(), queue.get_context());
  265. element_sub(mdev, value, rdev, queue);
  266. rdev.to_host(result, queue);
  267. }
  268. template <typename T, typename L, typename A>
  269. ublas::matrix<T, L, A> element_sub(ublas::matrix<T, L, A> const &m, T value,
  270. compute::command_queue& queue)
  271. {
  272. ublas::matrix<T, L, A> result(m.size1(), m.size2());
  273. element_sub(m, value, result, queue);
  274. return result;
  275. }
  276. template <typename T>
  277. void element_sub(ublas::vector<T, opencl::storage> const &v, T value,
  278. ublas::vector<T, opencl::storage> &result,
  279. compute::command_queue& queue)
  280. {
  281. assert(v.device() == result.device() && v.device() == queue.get_device());
  282. assert(v.size() == result.size());
  283. compute::transform(v.begin(), v.end(), result.begin(), lambda::_1 - value, queue);
  284. queue.finish();
  285. }
  286. template <typename T, typename A>
  287. void element_sub(ublas::vector<T, A> const &v, T value,
  288. ublas::vector<T, A> &result,
  289. compute::command_queue& queue)
  290. {
  291. ublas::vector<T, opencl::storage> vdev(v, queue);
  292. ublas::vector<T, opencl::storage> rdev(v.size(), queue.get_context());
  293. element_sub(vdev, value, rdev, queue);
  294. rdev.to_host(result, queue);
  295. }
  296. template <typename T, typename A>
  297. ublas::vector<T, A> element_sub(ublas::vector<T, A> const &v, T value,
  298. compute::command_queue& queue)
  299. {
  300. ublas::vector<T, A> result(v.size());
  301. element_sub(v, value, result, queue);
  302. return result;
  303. }
  304. template <typename T, typename L1, typename L2, typename L3>
  305. void element_prod(ublas::matrix<T, L1, opencl::storage> const &a,
  306. ublas::matrix<T, L2, opencl::storage> const &b,
  307. ublas::matrix<T, L3, opencl::storage> &result,
  308. compute::command_queue& queue)
  309. {
  310. element_wise(a, b, result, compute::multiplies<T>(), queue);
  311. }
  312. template <typename T, typename L1, typename L2, typename L3, typename A>
  313. void element_prod(ublas::matrix<T, L1, A> const &a,
  314. ublas::matrix<T, L2, A> const &b,
  315. ublas::matrix<T, L3, A> &result,
  316. compute::command_queue &queue)
  317. {
  318. element_wise(a, b, result, compute::multiplies<T>(), queue);
  319. }
  320. template <typename T, typename L1, typename L2, typename A>
  321. ublas::matrix<T, L1, A> element_prod(ublas::matrix<T, L1, A> const &a,
  322. ublas::matrix<T, L2, A> const &b,
  323. compute::command_queue &queue)
  324. {
  325. return element_wise(a, b, compute::multiplies<T>(), queue);
  326. }
  327. template <typename T>
  328. void element_prod(ublas::vector<T, opencl::storage> const &a,
  329. ublas::vector<T, opencl::storage> const &b,
  330. ublas::vector<T, opencl::storage> &result,
  331. compute::command_queue& queue)
  332. {
  333. element_wise(a, b, result, compute::multiplies<T>(), queue);
  334. }
  335. template <typename T, typename A>
  336. void element_prod(ublas::vector<T, A> const &a,
  337. ublas::vector<T, A> const &b,
  338. ublas::vector<T, A> &result,
  339. compute::command_queue &queue)
  340. {
  341. element_wise(a, b, result, compute::multiplies<T>(), queue);
  342. }
  343. template <typename T, typename A>
  344. ublas::vector<T, A> element_prod(ublas::vector<T, A> const &a,
  345. ublas::vector<T, A> const &b,
  346. compute::command_queue &queue)
  347. {
  348. return element_wise(a, b, compute::multiplies<T>(), queue);
  349. }
  350. template <typename T, typename L>
  351. void element_scale(ublas::matrix<T, L, opencl::storage> const &m, T value,
  352. ublas::matrix<T, L, opencl::storage> &result,
  353. compute::command_queue& queue)
  354. {
  355. assert(m.device() == result.device() && m.device() == queue.get_device());
  356. assert(m.size1() == result.size1() && m.size2() == result.size2());
  357. compute::transform(m.begin(), m.end(), result.begin(), lambda::_1 * value, queue);
  358. queue.finish();
  359. }
  360. template <typename T, typename L, typename A>
  361. void element_scale(ublas::matrix<T, L, A> const &m, T value,
  362. ublas::matrix<T, L, A> &result,
  363. compute::command_queue& queue)
  364. {
  365. ublas::matrix<T, L, opencl::storage> mdev(m, queue);
  366. ublas::matrix<T, L, opencl::storage> rdev(result.size1(), result.size2(), queue.get_context());
  367. element_scale(mdev, value, rdev, queue);
  368. rdev.to_host(result, queue);
  369. }
  370. template <typename T, typename L, typename A>
  371. ublas::matrix<T, L, A> element_scale(ublas::matrix<T, L, A> const &m, T value,
  372. compute::command_queue& queue)
  373. {
  374. ublas::matrix<T, L, A> result(m.size1(), m.size2());
  375. element_scale(m, value, result, queue);
  376. return result;
  377. }
  378. template <typename T>
  379. void element_scale(ublas::vector<T, opencl::storage> const &v, T value,
  380. ublas::vector<T, opencl::storage> &result,
  381. compute::command_queue& queue)
  382. {
  383. assert(v.device() == result.device() && v.device() == queue.get_device());
  384. assert(v.size() == result.size());
  385. compute::transform(v.begin(), v.end(), result.begin(), lambda::_1 * value, queue);
  386. queue.finish();
  387. }
  388. template <typename T, typename A>
  389. void element_scale(ublas::vector<T, A> const &v, T value,
  390. ublas::vector<T, A> & result,
  391. compute::command_queue& queue)
  392. {
  393. ublas::vector<T, opencl::storage> vdev(v, queue);
  394. ublas::vector<T, opencl::storage> rdev(v.size(), queue.get_context());
  395. element_scale(vdev, value, rdev, queue);
  396. rdev.to_host(result, queue);
  397. }
  398. template <typename T, typename A>
  399. ublas::vector<T,A> element_scale(ublas::vector<T, A> const &v, T value,
  400. compute::command_queue& queue)
  401. {
  402. ublas::vector<T, A> result(v.size());
  403. element_scale(v, value, result, queue);
  404. return result;
  405. }
  406. template <typename T, typename L1, typename L2, typename L3>
  407. void element_div(ublas::matrix<T, L1, opencl::storage> const &a,
  408. ublas::matrix<T, L2, opencl::storage> const &b,
  409. ublas::matrix<T, L3, opencl::storage> &result,
  410. compute::command_queue& queue)
  411. {
  412. element_wise(a, b, result, compute::divides<T>(), queue);
  413. }
  414. template <typename T, typename L1, typename L2, typename L3, typename A>
  415. void element_div(ublas::matrix<T, L1, A> const &a,
  416. ublas::matrix<T, L2, A> const &b,
  417. ublas::matrix<T, L3, A> &result,
  418. compute::command_queue &queue)
  419. {
  420. element_wise(a, b, result, compute::divides<T>(), queue);
  421. }
  422. template <typename T, typename L1, typename L2, typename A>
  423. ublas::matrix<T, L1, A> element_div(ublas::matrix<T, L1, A> const &a,
  424. ublas::matrix<T, L2, A> const &b,
  425. compute::command_queue &queue)
  426. {
  427. return element_wise(a, b, compute::divides<T>(), queue);
  428. }
  429. template <typename T>
  430. void element_div(ublas::vector<T, opencl::storage> const &a,
  431. ublas::vector<T, opencl::storage> const &b,
  432. ublas::vector<T, opencl::storage> &result,
  433. compute::command_queue& queue)
  434. {
  435. element_wise(a, b, result, compute::divides<T>(), queue);
  436. }
  437. template <typename T, typename A>
  438. void element_div(ublas::vector<T, A> const &a,
  439. ublas::vector<T, A> const &b,
  440. ublas::vector<T, A> &result,
  441. compute::command_queue &queue)
  442. {
  443. element_wise(a, b, result, compute::divides<T>(), queue);
  444. }
  445. template <typename T, typename A>
  446. ublas::vector<T, A> element_div(ublas::vector<T, A> const &a,
  447. ublas::vector<T, A> const &b,
  448. compute::command_queue &queue)
  449. {
  450. return element_wise(a, b, compute::divides<T>(), queue);
  451. }
  452. }}}}
  453. #endif