parallel_sort.hpp 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236
  1. //----------------------------------------------------------------------------
  2. /// @file parallel_sort.hpp
  3. /// @brief Contains the parallel_sort class, which is part of the
  4. /// block_indirect_sort algorithm
  5. ///
  6. /// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
  7. /// Distributed under the Boost Software License, Version 1.0.\n
  8. /// ( See accompanying file LICENSE_1_0.txt or copy at
  9. /// http://www.boost.org/LICENSE_1_0.txt )
  10. /// @version 0.1
  11. ///
  12. /// @remarks
  13. //-----------------------------------------------------------------------------
  14. #ifndef __BOOST_SORT_PARALLEL_DETAIL_PARALLEL_SORT_HPP
  15. #define __BOOST_SORT_PARALLEL_DETAIL_PARALLEL_SORT_HPP
  16. #include <boost/sort/block_indirect_sort/blk_detail/backbone.hpp>
  17. #include <boost/sort/pdqsort/pdqsort.hpp>
  18. #include <boost/sort/common/pivot.hpp>
  19. namespace boost
  20. {
  21. namespace sort
  22. {
  23. namespace blk_detail
  24. {
  25. //----------------------------------------------------------------------------
  26. // USING SENTENCES
  27. //----------------------------------------------------------------------------
  28. namespace bsc = boost::sort::common;
  29. namespace bscu = bsc::util;
  30. using bscu::nbits64;
  31. using bsc::pivot9;
  32. using boost::sort::pdqsort;
  33. //
  34. ///---------------------------------------------------------------------------
  35. /// @struct parallel_sort
  36. /// @brief This class do a parallel sort, using the quicksort filtering,
  37. /// splitting the data until the number of elements is smaller than a
  38. /// predefined value (max_per_thread)
  39. //----------------------------------------------------------------------------
  40. template<uint32_t Block_size, class Iter_t, class Compare>
  41. struct parallel_sort
  42. {
  43. //-------------------------------------------------------------------------
  44. // D E F I N I T I O N S
  45. //-------------------------------------------------------------------------
  46. typedef typename std::iterator_traits<Iter_t>::value_type value_t;
  47. typedef std::atomic<uint32_t> atomic_t;
  48. typedef std::function<void(void)> function_t;
  49. typedef backbone<Block_size, Iter_t, Compare> backbone_t;
  50. //------------------------------------------------------------------------
  51. // V A R I A B L E S
  52. //------------------------------------------------------------------------
  53. // reference to a object with all the data to sort
  54. backbone_t &bk;
  55. // maximun number of element to sort woth 1 thread
  56. size_t max_per_thread;
  57. // atomic counter for to detect the end of the works created inside
  58. // the object
  59. atomic_t counter;
  60. //------------------------------------------------------------------------
  61. // F U N C T I O N S
  62. //------------------------------------------------------------------------
  63. parallel_sort(backbone_t &bkbn, Iter_t first, Iter_t last);
  64. void divide_sort(Iter_t first, Iter_t last, uint32_t level);
  65. //
  66. //------------------------------------------------------------------------
  67. // function : function_divide_sort
  68. /// @brief create a function_t with a call to divide_sort, and inser in
  69. /// the stack of the backbone
  70. //
  71. /// @param first : iterator to the first element of the range to divide
  72. /// @param last : iterator to the next element after the last element of
  73. /// the range to divide
  74. /// @param level : level of depth in the division.When zero call to
  75. /// pdqsort
  76. /// @param counter : atomic variable which is decremented when finish
  77. /// the function. This variable is used for to know
  78. /// when are finished all the function_t created
  79. /// inside an object
  80. /// @param error : global indicator of error.
  81. //------------------------------------------------------------------------
  82. void function_divide_sort(Iter_t first, Iter_t last, uint32_t level,
  83. atomic_t &counter, bool &error)
  84. {
  85. bscu::atomic_add(counter, 1);
  86. function_t f1 = [this, first, last, level, &counter, &error]( )
  87. {
  88. if (not error)
  89. {
  90. try
  91. {
  92. this->divide_sort (first, last, level);
  93. }
  94. catch (std::bad_alloc &)
  95. {
  96. error = true;
  97. };
  98. };
  99. bscu::atomic_sub (counter, 1);
  100. };
  101. bk.works.emplace_back(f1);
  102. };
  103. //--------------------------------------------------------------------------
  104. };// end struct parallel_sort
  105. //--------------------------------------------------------------------------
  106. //
  107. //############################################################################
  108. // ##
  109. // ##
  110. // N O N I N L I N E F U N C T I O N S ##
  111. // ##
  112. // ##
  113. //############################################################################
  114. //
  115. //------------------------------------------------------------------------
  116. // function : parallel_sort
  117. /// @brief constructor of the class
  118. /// @param [in] bkbn : backbone struct with all the information to sort
  119. /// @param [in] first : iterator to the first element to sort
  120. /// @param [in] last : iterator to the next element after the last
  121. //------------------------------------------------------------------------
  122. template<uint32_t Block_size, class Iter_t, class Compare>
  123. parallel_sort<Block_size, Iter_t, Compare>
  124. ::parallel_sort(backbone_t &bkbn, Iter_t first, Iter_t last)
  125. : bk(bkbn), counter(0)
  126. {
  127. assert((last - first) >= 0);
  128. size_t nelem = size_t(last - first);
  129. //------------------- check if sort --------------------------------------
  130. bool sorted = true;
  131. for (Iter_t it1 = first, it2 = first + 1;
  132. it2 != last and (sorted = not bk.cmp(*it2, *it1)); it1 = it2++);
  133. if (sorted) return;
  134. //------------------- check if reverse sort ---------------------------
  135. sorted = true;
  136. for (Iter_t it1 = first, it2 = first + 1;
  137. it2 != last and (sorted = not bk.cmp(*it1, *it2)); it1 = it2++);
  138. if (sorted)
  139. {
  140. size_t nelem2 = nelem >> 1;
  141. Iter_t it1 = first, it2 = last - 1;
  142. for (size_t i = 0; i < nelem2; ++i)
  143. std::swap(*(it1++), *(it2--));
  144. return;
  145. };
  146. //-------------------max_per_thread ---------------------------
  147. uint32_t nbits_size = (nbits64(sizeof(value_t))) >> 1;
  148. if (nbits_size > 5) nbits_size = 5;
  149. max_per_thread = 1 << (18 - nbits_size);
  150. uint32_t level = ((nbits64(nelem / max_per_thread)) * 3) / 2;
  151. //---------------- check if only single thread -----------------------
  152. if (nelem < (max_per_thread))
  153. {
  154. pdqsort(first, last, bk.cmp);
  155. return;
  156. };
  157. if (not bk.error) divide_sort(first, last, level);
  158. // wait until all the parts are finished
  159. bk.exec(counter);
  160. };
  161. //------------------------------------------------------------------------
  162. // function : divide_sort
  163. /// @brief this function divide the data in two part, for to be sorted in
  164. /// a parallel mode
  165. /// @param first : iterator to the first element to sort
  166. /// @param last : iterator to the next element after the last
  167. /// @param level : level of depth before call to pdqsort
  168. //------------------------------------------------------------------------
  169. template<uint32_t Block_size, class Iter_t, class Compare>
  170. void parallel_sort<Block_size, Iter_t, Compare>
  171. ::divide_sort(Iter_t first, Iter_t last, uint32_t level)
  172. {
  173. //------------------- check if sort -----------------------------------
  174. bool sorted = true;
  175. for (Iter_t it1 = first, it2 = first + 1;
  176. it2 != last and (sorted = not bk.cmp(*it2, *it1)); it1 = it2++);
  177. if (sorted) return;
  178. //---------------- check if finish the subdivision -------------------
  179. size_t nelem = last - first;
  180. if (level == 0 or nelem < (max_per_thread))
  181. {
  182. return pdqsort(first, last, bk.cmp);
  183. };
  184. //-------------------- pivoting ----------------------------------
  185. pivot9(first, last, bk.cmp);
  186. const value_t &val = const_cast<value_t &>(*first);
  187. Iter_t c_first = first + 1, c_last = last - 1;
  188. while (bk.cmp(*c_first, val)) ++c_first;
  189. while (bk.cmp(val, *c_last)) --c_last;
  190. while (c_first < c_last)
  191. {
  192. std::swap(*(c_first++), *(c_last--));
  193. while (bk.cmp(*c_first, val))
  194. ++c_first;
  195. while (bk.cmp(val, *c_last))
  196. --c_last;
  197. };
  198. std::swap(*first, *c_last);
  199. // insert the work of the second half in the stack of works
  200. function_divide_sort(c_first, last, level - 1, counter, bk.error);
  201. if (bk.error) return;
  202. // The first half is done by the same thread
  203. function_divide_sort(first, c_last, level - 1, counter, bk.error);
  204. };
  205. //
  206. //****************************************************************************
  207. };// End namespace blk_detail
  208. };// End namespace sort
  209. };// End namespace boost
  210. //****************************************************************************
  211. //
  212. #endif