perf_tbb_merge.cpp 3.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. //---------------------------------------------------------------------------//
  2. // Copyright (c) 2014 Kyle Lutz <kyle.r.lutz@gmail.com>
  3. //
  4. // Distributed under the Boost Software License, Version 1.0
  5. // See accompanying file LICENSE_1_0.txt or copy at
  6. // http://www.boost.org/LICENSE_1_0.txt
  7. //
  8. // See http://boostorg.github.com/compute for more information.
  9. //---------------------------------------------------------------------------//
  10. #include <algorithm>
  11. #include <iostream>
  12. #include <vector>
  13. #include <tbb/parallel_for.h>
  14. #include "perf.hpp"
  15. // example from: http://www.threadingbuildingblocks.org/docs/help/reference/algorithms/parallel_for_func.htm
  16. using namespace tbb;
  17. template<typename Iterator>
  18. struct ParallelMergeRange {
  19. static size_t grainsize;
  20. Iterator begin1, end1; // [begin1,end1) is 1st sequence to be merged
  21. Iterator begin2, end2; // [begin2,end2) is 2nd sequence to be merged
  22. Iterator out; // where to put merged sequence
  23. bool empty() const {return (end1-begin1)+(end2-begin2)==0;}
  24. bool is_divisible() const {
  25. return (std::min)( end1-begin1, end2-begin2 ) > grainsize;
  26. }
  27. ParallelMergeRange( ParallelMergeRange& r, split ) {
  28. if( r.end1-r.begin1 < r.end2-r.begin2 ) {
  29. std::swap(r.begin1,r.begin2);
  30. std::swap(r.end1,r.end2);
  31. }
  32. Iterator m1 = r.begin1 + (r.end1-r.begin1)/2;
  33. Iterator m2 = std::lower_bound( r.begin2, r.end2, *m1 );
  34. begin1 = m1;
  35. begin2 = m2;
  36. end1 = r.end1;
  37. end2 = r.end2;
  38. out = r.out + (m1-r.begin1) + (m2-r.begin2);
  39. r.end1 = m1;
  40. r.end2 = m2;
  41. }
  42. ParallelMergeRange( Iterator begin1_, Iterator end1_,
  43. Iterator begin2_, Iterator end2_,
  44. Iterator out_ ) :
  45. begin1(begin1_), end1(end1_),
  46. begin2(begin2_), end2(end2_), out(out_)
  47. {}
  48. };
  49. template<typename Iterator>
  50. size_t ParallelMergeRange<Iterator>::grainsize = 1000;
  51. template<typename Iterator>
  52. struct ParallelMergeBody {
  53. void operator()( ParallelMergeRange<Iterator>& r ) const {
  54. std::merge( r.begin1, r.end1, r.begin2, r.end2, r.out );
  55. }
  56. };
  57. template<typename Iterator>
  58. void ParallelMerge( Iterator begin1, Iterator end1, Iterator begin2, Iterator end2, Iterator out ) {
  59. parallel_for(
  60. ParallelMergeRange<Iterator>(begin1,end1,begin2,end2,out),
  61. ParallelMergeBody<Iterator>(),
  62. simple_partitioner()
  63. );
  64. }
  65. int main(int argc, char *argv[])
  66. {
  67. perf_parse_args(argc, argv);
  68. std::cout << "size: " << PERF_N << std::endl;
  69. std::vector<int> v1 = generate_random_vector<int>(PERF_N / 2);
  70. std::vector<int> v2 = generate_random_vector<int>(PERF_N / 2);
  71. std::vector<int> v3(PERF_N);
  72. std::sort(v1.begin(), v1.end());
  73. std::sort(v2.begin(), v2.end());
  74. perf_timer t;
  75. for(size_t trial = 0; trial < PERF_TRIALS; trial++){
  76. t.start();
  77. ParallelMerge(v1.begin(), v1.end(), v2.begin(), v2.end(), v3.begin());
  78. t.stop();
  79. }
  80. std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
  81. return 0;
  82. }