perf_sort.cpp 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. //---------------------------------------------------------------------------//
  2. // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
  3. //
  4. // Distributed under the Boost Software License, Version 1.0
  5. // See accompanying file LICENSE_1_0.txt or copy at
  6. // http://www.boost.org/LICENSE_1_0.txt
  7. //
  8. // See http://boostorg.github.com/compute for more information.
  9. //---------------------------------------------------------------------------//
  10. #include <algorithm>
  11. #include <iostream>
  12. #include <vector>
  13. #include <boost/program_options.hpp>
  14. #include <boost/compute/system.hpp>
  15. #include <boost/compute/algorithm/sort.hpp>
  16. #include <boost/compute/algorithm/is_sorted.hpp>
  17. #include <boost/compute/container/vector.hpp>
  18. #include "perf.hpp"
  19. namespace po = boost::program_options;
  20. namespace compute = boost::compute;
  21. template<class T>
  22. double perf_sort(const std::vector<T>& data,
  23. const size_t trials,
  24. compute::command_queue& queue)
  25. {
  26. compute::vector<T> vec(data.size(), queue.get_context());
  27. perf_timer t;
  28. for(size_t trial = 0; trial < trials; trial++){
  29. compute::copy(data.begin(), data.end(), vec.begin(), queue);
  30. t.start();
  31. compute::sort(vec.begin(), vec.end(), queue);
  32. queue.finish();
  33. t.stop();
  34. if(!compute::is_sorted(vec.begin(), vec.end(), queue)){
  35. std::cerr << "ERROR: is_sorted() returned false" << std::endl;
  36. }
  37. }
  38. return t.min_time();
  39. }
  40. template<class T>
  41. void tune_sort(const std::vector<T>& data,
  42. const size_t trials,
  43. compute::command_queue& queue)
  44. {
  45. boost::shared_ptr<compute::detail::parameter_cache>
  46. params = compute::detail::parameter_cache::get_global_cache(queue.get_device());
  47. const std::string cache_key =
  48. std::string("__boost_radix_sort_") + compute::type_name<T>();
  49. const compute::uint_ tpbs[] = { 32, 64, 128, 256, 512, 1024 };
  50. double min_time = (std::numeric_limits<double>::max)();
  51. compute::uint_ best_tpb = 0;
  52. for(size_t i = 0; i < sizeof(tpbs) / sizeof(*tpbs); i++){
  53. params->set(cache_key, "tpb", tpbs[i]);
  54. try {
  55. const double t = perf_sort(data, trials, queue);
  56. if(t < min_time){
  57. best_tpb = tpbs[i];
  58. min_time = t;
  59. }
  60. }
  61. catch(compute::opencl_error&){
  62. // invalid work group size for this device, skip
  63. }
  64. }
  65. // store optimal parameters
  66. params->set(cache_key, "tpb", best_tpb);
  67. }
  68. int main(int argc, char *argv[])
  69. {
  70. // setup command line arguments
  71. po::options_description options("options");
  72. options.add_options()
  73. ("help", "show usage instructions")
  74. ("size", po::value<size_t>()->default_value(8192), "input size")
  75. ("trials", po::value<size_t>()->default_value(3), "number of trials to run")
  76. ("tune", "run tuning procedure")
  77. ;
  78. po::positional_options_description positional_options;
  79. positional_options.add("size", 1);
  80. // parse command line
  81. po::variables_map vm;
  82. po::store(
  83. po::command_line_parser(argc, argv)
  84. .options(options).positional(positional_options).run(),
  85. vm
  86. );
  87. po::notify(vm);
  88. const size_t size = vm["size"].as<size_t>();
  89. const size_t trials = vm["trials"].as<size_t>();
  90. std::cout << "size: " << size << std::endl;
  91. // setup context and queue for the default device
  92. compute::device device = boost::compute::system::default_device();
  93. compute::context context(device);
  94. compute::command_queue queue(context, device);
  95. std::cout << "device: " << device.name() << std::endl;
  96. // create vector of random numbers on the host
  97. std::vector<unsigned int> data(size);
  98. std::generate(data.begin(), data.end(), rand);
  99. // run tuning proceure (if requested)
  100. if(vm.count("tune")){
  101. tune_sort(data, trials, queue);
  102. }
  103. // run sort benchmark
  104. double t = perf_sort(data, trials, queue);
  105. std::cout << "time: " << t / 1e6 << " ms" << std::endl;
  106. return 0;
  107. }