test_scan.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490
  1. //---------------------------------------------------------------------------//
  2. // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
  3. //
  4. // Distributed under the Boost Software License, Version 1.0
  5. // See accompanying file LICENSE_1_0.txt or copy at
  6. // http://www.boost.org/LICENSE_1_0.txt
  7. //
  8. // See http://boostorg.github.com/compute for more information.
  9. //---------------------------------------------------------------------------//
  10. // Undefining BOOST_COMPUTE_USE_OFFLINE_CACHE macro as we want to modify cached
  11. // parameters for copy algorithm without any undesirable consequences (like
  12. // saving modified values of those parameters).
  13. #ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE
  14. #undef BOOST_COMPUTE_USE_OFFLINE_CACHE
  15. #endif
  16. #define BOOST_TEST_MODULE TestScan
  17. #include <boost/test/unit_test.hpp>
  18. #include <numeric>
  19. #include <functional>
  20. #include <vector>
  21. #include <boost/compute/functional.hpp>
  22. #include <boost/compute/lambda.hpp>
  23. #include <boost/compute/system.hpp>
  24. #include <boost/compute/command_queue.hpp>
  25. #include <boost/compute/algorithm/copy.hpp>
  26. #include <boost/compute/algorithm/exclusive_scan.hpp>
  27. #include <boost/compute/algorithm/inclusive_scan.hpp>
  28. #include <boost/compute/container/vector.hpp>
  29. #include <boost/compute/iterator/counting_iterator.hpp>
  30. #include <boost/compute/iterator/transform_iterator.hpp>
  31. #include "check_macros.hpp"
  32. #include "context_setup.hpp"
  33. namespace bc = boost::compute;
  34. BOOST_AUTO_TEST_CASE(inclusive_scan_int)
  35. {
  36. using boost::compute::uint_;
  37. using boost::compute::int_;
  38. int_ data[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
  39. bc::vector<int_> vector(data, data + 12, queue);
  40. BOOST_CHECK_EQUAL(vector.size(), size_t(12));
  41. bc::vector<int_> result(12, context);
  42. BOOST_CHECK_EQUAL(result.size(), size_t(12));
  43. // inclusive scan
  44. bc::inclusive_scan(vector.begin(), vector.end(), result.begin(), queue);
  45. CHECK_RANGE_EQUAL(int_, 12, result, (0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 66));
  46. // in-place inclusive scan
  47. CHECK_RANGE_EQUAL(int_, 12, vector, (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11));
  48. bc::inclusive_scan(vector.begin(), vector.end(), vector.begin(), queue);
  49. CHECK_RANGE_EQUAL(int_, 12, vector, (0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 66));
  50. // scan_on_cpu
  51. bc::copy(data, data + 12, vector.begin(), queue);
  52. // make sure parallel scan_on_cpu is used, no serial_scan
  53. std::string cache_key =
  54. "__boost_scan_cpu_4";
  55. boost::shared_ptr<bc::detail::parameter_cache> parameters =
  56. bc::detail::parameter_cache::get_global_cache(device);
  57. // save
  58. uint_ map_copy_threshold =
  59. parameters->get(cache_key, "serial_scan_threshold", 0);
  60. // force parallel scan_on_cpu
  61. parameters->set(cache_key, "serial_scan_threshold", 0);
  62. // inclusive scan
  63. bc::inclusive_scan(vector.begin(), vector.end(), result.begin(), queue);
  64. CHECK_RANGE_EQUAL(int_, 12, result, (0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 66));
  65. // in-place inclusive scan
  66. CHECK_RANGE_EQUAL(int_, 12, vector, (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11));
  67. bc::inclusive_scan(vector.begin(), vector.end(), vector.begin(), queue);
  68. CHECK_RANGE_EQUAL(int_, 12, vector, (0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 66));
  69. // restore
  70. parameters->set(cache_key, "serial_scan_threshold", map_copy_threshold);
  71. }
  72. BOOST_AUTO_TEST_CASE(exclusive_scan_int)
  73. {
  74. using boost::compute::uint_;
  75. using boost::compute::int_;
  76. int_ data[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
  77. bc::vector<int_> vector(data, data + 12, queue);
  78. BOOST_CHECK_EQUAL(vector.size(), size_t(12));
  79. bc::vector<int_> result(size_t(12), int_(0), queue);
  80. BOOST_CHECK_EQUAL(result.size(), size_t(12));
  81. // exclusive scan
  82. bc::exclusive_scan(vector.begin(), vector.end(), result.begin(), queue);
  83. CHECK_RANGE_EQUAL(int_, 12, result, (0, 0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55));
  84. // in-place exclusive scan
  85. CHECK_RANGE_EQUAL(int_, 12, vector, (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11));
  86. bc::exclusive_scan(vector.begin(), vector.end(), vector.begin(), queue);
  87. CHECK_RANGE_EQUAL(int_, 12, vector, (0, 0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55));
  88. // scan_on_cpu
  89. bc::copy(data, data + 12, vector.begin(), queue);
  90. // make sure parallel scan_on_cpu is used, no serial_scan
  91. std::string cache_key =
  92. "__boost_scan_cpu_4";
  93. boost::shared_ptr<bc::detail::parameter_cache> parameters =
  94. bc::detail::parameter_cache::get_global_cache(device);
  95. // save
  96. uint_ map_copy_threshold =
  97. parameters->get(cache_key, "serial_scan_threshold", 0);
  98. // force parallel scan_on_cpu
  99. parameters->set(cache_key, "serial_scan_threshold", 0);
  100. // exclusive scan
  101. bc::exclusive_scan(vector.begin(), vector.end(), result.begin(), queue);
  102. CHECK_RANGE_EQUAL(int_, 12, result, (0, 0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55));
  103. // in-place exclusive scan
  104. CHECK_RANGE_EQUAL(int_, 12, vector, (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11));
  105. bc::exclusive_scan(vector.begin(), vector.end(), vector.begin(), queue);
  106. CHECK_RANGE_EQUAL(int_, 12, vector, (0, 0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55));
  107. // restore
  108. parameters->set(cache_key, "serial_scan_threshold", map_copy_threshold);
  109. }
  110. BOOST_AUTO_TEST_CASE(inclusive_scan_int2)
  111. {
  112. using boost::compute::int_;
  113. using boost::compute::uint_;
  114. using boost::compute::int2_;
  115. int_ data[] = { 1, 2,
  116. 3, 4,
  117. 5, 6,
  118. 7, 8,
  119. 9, 0 };
  120. boost::compute::vector<int2_> input(reinterpret_cast<int2_*>(data),
  121. reinterpret_cast<int2_*>(data) + 5,
  122. queue);
  123. BOOST_CHECK_EQUAL(input.size(), size_t(5));
  124. boost::compute::vector<int2_> output(5, context);
  125. boost::compute::inclusive_scan(input.begin(), input.end(), output.begin(),
  126. queue);
  127. CHECK_RANGE_EQUAL(
  128. int2_, 5, output,
  129. (int2_(1, 2), int2_(4, 6), int2_(9, 12), int2_(16, 20), int2_(25, 20))
  130. );
  131. // scan_on_cpu
  132. // make sure parallel scan_on_cpu is used, no serial_scan
  133. std::string cache_key =
  134. "__boost_scan_cpu_8";
  135. boost::shared_ptr<bc::detail::parameter_cache> parameters =
  136. bc::detail::parameter_cache::get_global_cache(device);
  137. // save
  138. uint_ map_copy_threshold =
  139. parameters->get(cache_key, "serial_scan_threshold", 0);
  140. // force parallel scan_on_cpu
  141. parameters->set(cache_key, "serial_scan_threshold", 0);
  142. boost::compute::inclusive_scan(input.begin(), input.end(), output.begin(),
  143. queue);
  144. CHECK_RANGE_EQUAL(
  145. int2_, 5, output,
  146. (int2_(1, 2), int2_(4, 6), int2_(9, 12), int2_(16, 20), int2_(25, 20))
  147. );
  148. // restore
  149. parameters->set(cache_key, "serial_scan_threshold", map_copy_threshold);
  150. }
  151. BOOST_AUTO_TEST_CASE(inclusive_scan_counting_iterator)
  152. {
  153. using boost::compute::int_;
  154. using boost::compute::uint_;
  155. bc::vector<int_> result(10, context);
  156. bc::inclusive_scan(bc::make_counting_iterator(1),
  157. bc::make_counting_iterator(11),
  158. result.begin(), queue);
  159. CHECK_RANGE_EQUAL(int_, 10, result, (1, 3, 6, 10, 15, 21, 28, 36, 45, 55));
  160. // scan_on_cpu
  161. // make sure parallel scan_on_cpu is used, no serial_scan
  162. std::string cache_key =
  163. "__boost_scan_cpu_4";
  164. boost::shared_ptr<bc::detail::parameter_cache> parameters =
  165. bc::detail::parameter_cache::get_global_cache(device);
  166. // save
  167. uint_ map_copy_threshold =
  168. parameters->get(cache_key, "serial_scan_threshold", 0);
  169. // force parallel scan_on_cpu
  170. parameters->set(cache_key, "serial_scan_threshold", 0);
  171. bc::inclusive_scan(bc::make_counting_iterator(1),
  172. bc::make_counting_iterator(11),
  173. result.begin(), queue);
  174. CHECK_RANGE_EQUAL(int_, 10, result, (1, 3, 6, 10, 15, 21, 28, 36, 45, 55));
  175. // restore
  176. parameters->set(cache_key, "serial_scan_threshold", map_copy_threshold);
  177. }
  178. BOOST_AUTO_TEST_CASE(exclusive_scan_counting_iterator)
  179. {
  180. using boost::compute::int_;
  181. using boost::compute::uint_;
  182. bc::vector<int_> result(10, context);
  183. bc::exclusive_scan(bc::make_counting_iterator(1),
  184. bc::make_counting_iterator(11),
  185. result.begin(), queue);
  186. CHECK_RANGE_EQUAL(int_, 10, result, (0, 1, 3, 6, 10, 15, 21, 28, 36, 45));
  187. // scan_on_cpu
  188. // make sure parallel scan_on_cpu is used, no serial_scan
  189. std::string cache_key =
  190. "__boost_scan_cpu_4";
  191. boost::shared_ptr<bc::detail::parameter_cache> parameters =
  192. bc::detail::parameter_cache::get_global_cache(device);
  193. // save
  194. uint_ map_copy_threshold =
  195. parameters->get(cache_key, "serial_scan_threshold", 0);
  196. // force parallel scan_on_cpu
  197. parameters->set(cache_key, "serial_scan_threshold", 0);
  198. bc::exclusive_scan(bc::make_counting_iterator(1),
  199. bc::make_counting_iterator(11),
  200. result.begin(), queue);
  201. CHECK_RANGE_EQUAL(int_, 10, result, (0, 1, 3, 6, 10, 15, 21, 28, 36, 45));
  202. // restore
  203. parameters->set(cache_key, "serial_scan_threshold", map_copy_threshold);
  204. }
  205. BOOST_AUTO_TEST_CASE(inclusive_scan_transform_iterator)
  206. {
  207. float data[] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f };
  208. bc::vector<float> input(data, data + 5, queue);
  209. bc::vector<float> output(5, context);
  210. // normal inclusive scan of the input
  211. bc::inclusive_scan(input.begin(), input.end(), output.begin(), queue);
  212. queue.finish();
  213. BOOST_CHECK_CLOSE(float(output[0]), 1.0f, 1e-4f);
  214. BOOST_CHECK_CLOSE(float(output[1]), 3.0f, 1e-4f);
  215. BOOST_CHECK_CLOSE(float(output[2]), 6.0f, 1e-4f);
  216. BOOST_CHECK_CLOSE(float(output[3]), 10.0f, 1e-4f);
  217. BOOST_CHECK_CLOSE(float(output[4]), 15.0f, 1e-4f);
  218. // inclusive scan of squares of the input
  219. using ::boost::compute::_1;
  220. bc::inclusive_scan(bc::make_transform_iterator(input.begin(), pown(_1, 2)),
  221. bc::make_transform_iterator(input.end(), pown(_1, 2)),
  222. output.begin(), queue);
  223. queue.finish();
  224. BOOST_CHECK_CLOSE(float(output[0]), 1.0f, 1e-4f);
  225. BOOST_CHECK_CLOSE(float(output[1]), 5.0f, 1e-4f);
  226. BOOST_CHECK_CLOSE(float(output[2]), 14.0f, 1e-4f);
  227. BOOST_CHECK_CLOSE(float(output[3]), 30.0f, 1e-4f);
  228. BOOST_CHECK_CLOSE(float(output[4]), 55.0f, 1e-4f);
  229. }
  230. BOOST_AUTO_TEST_CASE(inclusive_scan_doctest)
  231. {
  232. //! [inclusive_scan_int]
  233. // setup input
  234. int data[] = { 1, 2, 3, 4 };
  235. boost::compute::vector<int> input(data, data + 4, queue);
  236. // setup output
  237. boost::compute::vector<int> output(4, context);
  238. // scan values
  239. boost::compute::inclusive_scan(
  240. input.begin(), input.end(), output.begin(), queue
  241. );
  242. // output = [ 1, 3, 6, 10 ]
  243. //! [inclusive_scan_int]
  244. CHECK_RANGE_EQUAL(int, 4, output, (1, 3, 6, 10));
  245. }
  246. BOOST_AUTO_TEST_CASE(exclusive_scan_doctest)
  247. {
  248. //! [exclusive_scan_int]
  249. // setup input
  250. int data[] = { 1, 2, 3, 4 };
  251. boost::compute::vector<int> input(data, data + 4, queue);
  252. // setup output
  253. boost::compute::vector<int> output(4, context);
  254. // scan values
  255. boost::compute::exclusive_scan(
  256. input.begin(), input.end(), output.begin(), queue
  257. );
  258. // output = [ 0, 1, 3, 6 ]
  259. //! [exclusive_scan_int]
  260. CHECK_RANGE_EQUAL(int, 4, output, (0, 1, 3, 6));
  261. }
  262. BOOST_AUTO_TEST_CASE(inclusive_scan_int_multiplies)
  263. {
  264. //! [inclusive_scan_int_multiplies]
  265. // setup input
  266. int data[] = { 1, 2, 1, 2, 3 };
  267. boost::compute::vector<int> input(data, data + 5, queue);
  268. // setup output
  269. boost::compute::vector<int> output(5, context);
  270. // inclusive scan with multiplication
  271. boost::compute::inclusive_scan(
  272. input.begin(), input.end(), output.begin(),
  273. boost::compute::multiplies<int>(), queue
  274. );
  275. // output = [1, 2, 2, 4, 12]
  276. //! [inclusive_scan_int_multiplies]
  277. BOOST_CHECK_EQUAL(input.size(), size_t(5));
  278. BOOST_CHECK_EQUAL(output.size(), size_t(5));
  279. CHECK_RANGE_EQUAL(int, 5, output, (1, 2, 2, 4, 12));
  280. // in-place inclusive scan
  281. CHECK_RANGE_EQUAL(int, 5, input, (1, 2, 1, 2, 3));
  282. boost::compute::inclusive_scan(input.begin(), input.end(), input.begin(),
  283. boost::compute::multiplies<int>(), queue);
  284. CHECK_RANGE_EQUAL(int, 5, input, (1, 2, 2, 4, 12));
  285. }
  286. BOOST_AUTO_TEST_CASE(exclusive_scan_int_multiplies)
  287. {
  288. //! [exclusive_scan_int_multiplies]
  289. // setup input
  290. int data[] = { 1, 2, 1, 2, 3 };
  291. boost::compute::vector<int> input(data, data + 5, queue);
  292. // setup output
  293. boost::compute::vector<int> output(5, context);
  294. // exclusive_scan with multiplication
  295. // initial value equals 10
  296. boost::compute::exclusive_scan(
  297. input.begin(), input.end(), output.begin(),
  298. int(10), boost::compute::multiplies<int>(), queue
  299. );
  300. // output = [10, 10, 20, 20, 40]
  301. //! [exclusive_scan_int_multiplies]
  302. BOOST_CHECK_EQUAL(input.size(), size_t(5));
  303. BOOST_CHECK_EQUAL(output.size(), size_t(5));
  304. CHECK_RANGE_EQUAL(int, 5, output, (10, 10, 20, 20, 40));
  305. // in-place exclusive scan
  306. CHECK_RANGE_EQUAL(int, 5, input, (1, 2, 1, 2, 3));
  307. bc::exclusive_scan(input.begin(), input.end(), input.begin(),
  308. int(10), bc::multiplies<int>(), queue);
  309. CHECK_RANGE_EQUAL(int, 5, input, (10, 10, 20, 20, 40));
  310. }
  311. BOOST_AUTO_TEST_CASE(inclusive_scan_int_multiplies_long_vector)
  312. {
  313. size_t size = 1000;
  314. bc::vector<int> device_vector(size, int(2), queue);
  315. BOOST_CHECK_EQUAL(device_vector.size(), size);
  316. bc::inclusive_scan(device_vector.begin(), device_vector.end(),
  317. device_vector.begin(), bc::multiplies<int>(), queue);
  318. std::vector<int> host_vector(size, 2);
  319. BOOST_CHECK_EQUAL(host_vector.size(), size);
  320. bc::copy(device_vector.begin(), device_vector.end(),
  321. host_vector.begin(), queue);
  322. std::vector<int> test(size, 2);
  323. BOOST_CHECK_EQUAL(test.size(), size);
  324. std::partial_sum(test.begin(), test.end(),
  325. test.begin(), std::multiplies<int>());
  326. BOOST_CHECK_EQUAL_COLLECTIONS(host_vector.begin(), host_vector.end(),
  327. test.begin(), test.end());
  328. }
  329. BOOST_AUTO_TEST_CASE(exclusive_scan_int_multiplies_long_vector)
  330. {
  331. size_t size = 1000;
  332. bc::vector<int> device_vector(size, int(2), queue);
  333. BOOST_CHECK_EQUAL(device_vector.size(), size);
  334. bc::exclusive_scan(device_vector.begin(), device_vector.end(),
  335. device_vector.begin(), int(10), bc::multiplies<int>(),
  336. queue);
  337. std::vector<int> host_vector(size, 2);
  338. BOOST_CHECK_EQUAL(host_vector.size(), size);
  339. bc::copy(device_vector.begin(), device_vector.end(),
  340. host_vector.begin(), queue);
  341. std::vector<int> test(size, 2);
  342. BOOST_CHECK_EQUAL(test.size(), size);
  343. test[0] = 10;
  344. std::partial_sum(test.begin(), test.end(),
  345. test.begin(), std::multiplies<int>());
  346. BOOST_CHECK_EQUAL_COLLECTIONS(host_vector.begin(), host_vector.end(),
  347. test.begin(), test.end());
  348. }
  349. BOOST_AUTO_TEST_CASE(inclusive_scan_int_custom_function)
  350. {
  351. BOOST_COMPUTE_FUNCTION(int, multi, (int x, int y),
  352. {
  353. return x * y * 2;
  354. });
  355. int data[] = { 1, 2, 1, 2, 3 };
  356. bc::vector<int> vector(data, data + 5, queue);
  357. BOOST_CHECK_EQUAL(vector.size(), size_t(5));
  358. bc::vector<int> result(5, context);
  359. BOOST_CHECK_EQUAL(result.size(), size_t(5));
  360. // inclusive scan
  361. bc::inclusive_scan(vector.begin(), vector.end(), result.begin(),
  362. multi, queue);
  363. CHECK_RANGE_EQUAL(int, 5, result, (1, 4, 8, 32, 192));
  364. // in-place inclusive scan
  365. CHECK_RANGE_EQUAL(int, 5, vector, (1, 2, 1, 2, 3));
  366. bc::inclusive_scan(vector.begin(), vector.end(), vector.begin(),
  367. multi, queue);
  368. CHECK_RANGE_EQUAL(int, 5, vector, (1, 4, 8, 32, 192));
  369. }
  370. BOOST_AUTO_TEST_CASE(exclusive_scan_int_custom_function)
  371. {
  372. BOOST_COMPUTE_FUNCTION(int, multi, (int x, int y),
  373. {
  374. return x * y * 2;
  375. });
  376. int data[] = { 1, 2, 1, 2, 3 };
  377. bc::vector<int> vector(data, data + 5, queue);
  378. BOOST_CHECK_EQUAL(vector.size(), size_t(5));
  379. bc::vector<int> result(5, context);
  380. BOOST_CHECK_EQUAL(result.size(), size_t(5));
  381. // exclusive_scan
  382. bc::exclusive_scan(vector.begin(), vector.end(), result.begin(),
  383. int(1), multi, queue);
  384. CHECK_RANGE_EQUAL(int, 5, result, (1, 2, 8, 16, 64));
  385. // in-place exclusive scan
  386. CHECK_RANGE_EQUAL(int, 5, vector, (1, 2, 1, 2, 3));
  387. bc::exclusive_scan(vector.begin(), vector.end(), vector.begin(),
  388. int(1), multi, queue);
  389. CHECK_RANGE_EQUAL(int, 5, vector, (1, 2, 8, 16, 64));
  390. }
  391. BOOST_AUTO_TEST_SUITE_END()