collectives.hpp 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697
  1. // Copyright (C) 2005-2006 Douglas Gregor <doug.gregor -at- gmail.com>.
  2. // Use, modification and distribution is subject to the Boost Software
  3. // License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
  4. // http://www.boost.org/LICENSE_1_0.txt)
  5. // Message Passing Interface 1.1 -- Section 4. MPI Collectives
  6. /** @file collectives.hpp
  7. *
  8. * This header contains MPI collective operations, which implement
  9. * various parallel algorithms that require the coordination of all
  10. * processes within a communicator. The header @c collectives_fwd.hpp
  11. * provides forward declarations for each of these operations. To
  12. * include only specific collective algorithms, use the headers @c
  13. * boost/mpi/collectives/algorithm_name.hpp.
  14. */
  15. #ifndef BOOST_MPI_COLLECTIVES_HPP
  16. #define BOOST_MPI_COLLECTIVES_HPP
  17. #include <boost/mpi/communicator.hpp>
  18. #include <boost/mpi/inplace.hpp>
  19. #include <vector>
  20. namespace boost { namespace mpi {
  21. /**
  22. * @brief Gather the values stored at every process into vectors of
  23. * values from each process.
  24. *
  25. * @c all_gather is a collective algorithm that collects the values
  26. * stored at each process into a vector of values indexed by the
  27. * process number they came from. The type @c T of the values may be
  28. * any type that is serializable or has an associated MPI data type.
  29. *
  30. * When the type @c T has an associated MPI data type, this routine
  31. * invokes @c MPI_Allgather to gather the values.
  32. *
  33. * @param comm The communicator over which the all-gather will
  34. * occur.
  35. *
  36. * @param in_value The value to be transmitted by each process. To
  37. * gather an array of values, @c in_values points to the @c n local
  38. * values to be transmitted.
  39. *
  40. * @param out_values A vector or pointer to storage that will be
  41. * populated with the values from each process, indexed by the
  42. * process ID number. If it is a vector, the vector will be resized
  43. * accordingly.
  44. */
  45. template<typename T>
  46. void
  47. all_gather(const communicator& comm, const T& in_value,
  48. std::vector<T>& out_values);
  49. /**
  50. * \overload
  51. */
  52. template<typename T>
  53. void
  54. all_gather(const communicator& comm, const T& in_value, T* out_values);
  55. /**
  56. * \overload
  57. */
  58. template<typename T>
  59. void
  60. all_gather(const communicator& comm, const T* in_values, int n,
  61. std::vector<T>& out_values);
  62. /**
  63. * \overload
  64. */
  65. template<typename T>
  66. void
  67. all_gather(const communicator& comm, const T* in_values, int n, T* out_values);
  68. /**
  69. * @brief Combine the values stored by each process into a single
  70. * value available to all processes.
  71. *
  72. * @c all_reduce is a collective algorithm that combines the values
  73. * stored by each process into a single value available to all
  74. * processes. The values are combined in a user-defined way,
  75. * specified via a function object. The type @c T of the values may
  76. * be any type that is serializable or has an associated MPI data
  77. * type. One can think of this operation as a @c all_gather, followed
  78. * by an @c std::accumulate() over the gather values and using the
  79. * operation @c op.
  80. *
  81. * When the type @c T has an associated MPI data type, this routine
  82. * invokes @c MPI_Allreduce to perform the reduction. If possible,
  83. * built-in MPI operations will be used; otherwise, @c all_reduce()
  84. * will create a custom MPI_Op for the call to MPI_Allreduce.
  85. *
  86. * @param comm The communicator over which the reduction will
  87. * occur.
  88. * @param value The local value to be combined with the local
  89. * values of every other process. For reducing arrays, @c in_values
  90. * is a pointer to the local values to be reduced and @c n is the
  91. * number of values to reduce. See @c reduce for more information.
  92. *
  93. * If wrapped in a @c inplace_t object, combine the usage of both
  94. * input and $c out_value and the local value will be overwritten
  95. * (a convenience function @c inplace is provided for the wrapping).
  96. *
  97. * @param out_value Will receive the result of the reduction
  98. * operation. If this parameter is omitted, the outgoing value will
  99. * instead be returned.
  100. *
  101. * @param op The binary operation that combines two values of type
  102. * @c T and returns a third value of type @c T. For types @c T that has
  103. * ssociated MPI data types, @c op will either be translated into
  104. * an @c MPI_Op (via @c MPI_Op_create) or, if possible, mapped
  105. * directly to a built-in MPI operation. See @c is_mpi_op in the @c
  106. * operations.hpp header for more details on this mapping. For any
  107. * non-built-in operation, commutativity will be determined by the
  108. * @c is_commmutative trait (also in @c operations.hpp): users are
  109. * encouraged to mark commutative operations as such, because it
  110. * gives the implementation additional lattitude to optimize the
  111. * reduction operation.
  112. *
  113. * @param n Indicated the size of the buffers of array type.
  114. * @returns If no @p out_value parameter is supplied, returns the
  115. * result of the reduction operation.
  116. */
  117. template<typename T, typename Op>
  118. void
  119. all_reduce(const communicator& comm, const T* value, int n, T* out_value,
  120. Op op);
  121. /**
  122. * \overload
  123. */
  124. template<typename T, typename Op>
  125. void
  126. all_reduce(const communicator& comm, const T& value, T& out_value, Op op);
  127. /**
  128. * \overload
  129. */
  130. template<typename T, typename Op>
  131. T all_reduce(const communicator& comm, const T& value, Op op);
  132. /**
  133. * \overload
  134. */
  135. template<typename T, typename Op>
  136. void
  137. all_reduce(const communicator& comm, inplace_t<T*> value, int n,
  138. Op op);
  139. /**
  140. * \overload
  141. */
  142. template<typename T, typename Op>
  143. void
  144. all_reduce(const communicator& comm, inplace_t<T> value, Op op);
  145. /**
  146. * @brief Send data from every process to every other process.
  147. *
  148. * @c all_to_all is a collective algorithm that transmits @c p values
  149. * from every process to every other process. On process i, jth value
  150. * of the @p in_values vector is sent to process j and placed in the
  151. * ith position of the @p out_values vector in process @p j. The type
  152. * @c T of the values may be any type that is serializable or has an
  153. * associated MPI data type. If @c n is provided, then arrays of @p n
  154. * values will be transferred from one process to another.
  155. *
  156. * When the type @c T has an associated MPI data type, this routine
  157. * invokes @c MPI_Alltoall to scatter the values.
  158. *
  159. * @param comm The communicator over which the all-to-all
  160. * communication will occur.
  161. *
  162. * @param in_values A vector or pointer to storage that contains
  163. * the values to send to each process, indexed by the process ID
  164. * number.
  165. *
  166. * @param out_values A vector or pointer to storage that will be
  167. * updated to contain the values received from other processes. The
  168. * jth value in @p out_values will come from the procss with rank j.
  169. */
  170. template<typename T>
  171. void
  172. all_to_all(const communicator& comm, const std::vector<T>& in_values,
  173. std::vector<T>& out_values);
  174. /**
  175. * \overload
  176. */
  177. template<typename T>
  178. void all_to_all(const communicator& comm, const T* in_values, T* out_values);
  179. /**
  180. * \overload
  181. */
  182. template<typename T>
  183. void
  184. all_to_all(const communicator& comm, const std::vector<T>& in_values, int n,
  185. std::vector<T>& out_values);
  186. /**
  187. * \overload
  188. */
  189. template<typename T>
  190. void
  191. all_to_all(const communicator& comm, const T* in_values, int n, T* out_values);
  192. /**
  193. * @brief Broadcast a value from a root process to all other
  194. * processes.
  195. *
  196. * @c broadcast is a collective algorithm that transfers a value from
  197. * an arbitrary @p root process to every other process that is part of
  198. * the given communicator. The @c broadcast algorithm can transmit any
  199. * Serializable value, values that have associated MPI data types,
  200. * packed archives, skeletons, and the content of skeletons; see the
  201. * @c send primitive for communicators for a complete list. The type
  202. * @c T shall be the same for all processes that are a part of the
  203. * communicator @p comm, unless packed archives are being transferred:
  204. * with packed archives, the root sends a @c packed_oarchive or @c
  205. * packed_skeleton_oarchive whereas the other processes receive a
  206. * @c packed_iarchive or @c packed_skeleton_iarchve, respectively.
  207. *
  208. * When the type @c T has an associated MPI data type, this routine
  209. * invokes @c MPI_Bcast to perform the broadcast.
  210. *
  211. * @param comm The communicator over which the broadcast will
  212. * occur.
  213. *
  214. * @param value The value (or values, if @p n is provided) to be
  215. * transmitted (if the rank of @p comm is equal to @p root) or
  216. * received (if the rank of @p comm is not equal to @p root). When
  217. * the @p value is a @c skeleton_proxy, only the skeleton of the
  218. * object will be broadcast. In this case, the @p root will build a
  219. * skeleton from the object help in the proxy and all of the
  220. * non-roots will reshape the objects held in their proxies based on
  221. * the skeleton sent from the root.
  222. *
  223. * @param n When supplied, the number of values that the pointer @p
  224. * values points to, for broadcasting an array of values. The value
  225. * of @p n must be the same for all processes in @p comm.
  226. *
  227. * @param root The rank/process ID of the process that will be
  228. * transmitting the value.
  229. */
  230. template<typename T>
  231. void broadcast(const communicator& comm, T& value, int root);
  232. /**
  233. * \overload
  234. */
  235. template<typename T>
  236. void broadcast(const communicator& comm, T* values, int n, int root);
  237. /**
  238. * \overload
  239. */
  240. template<typename T>
  241. void broadcast(const communicator& comm, skeleton_proxy<T>& value, int root);
  242. /**
  243. * \overload
  244. */
  245. template<typename T>
  246. void
  247. broadcast(const communicator& comm, const skeleton_proxy<T>& value, int root);
  248. /**
  249. * @brief Gather the values stored at every process into a vector at
  250. * the root process.
  251. *
  252. * @c gather is a collective algorithm that collects the values
  253. * stored at each process into a vector of values at the @p root
  254. * process. This vector is indexed by the process number that the
  255. * value came from. The type @c T of the values may be any type that
  256. * is serializable or has an associated MPI data type.
  257. *
  258. * When the type @c T has an associated MPI data type, this routine
  259. * invokes @c MPI_Gather to gather the values.
  260. *
  261. * @param comm The communicator over which the gather will occur.
  262. *
  263. * @param in_value The value to be transmitted by each process. For
  264. * gathering arrays of values, @c in_values points to storage for
  265. * @c n*comm.size() values.
  266. *
  267. * @param out_values A vector or pointer to storage that will be
  268. * populated with the values from each process, indexed by the
  269. * process ID number. If it is a vector, it will be resized
  270. * accordingly. For non-root processes, this parameter may be
  271. * omitted. If it is still provided, however, it will be unchanged.
  272. *
  273. * @param root The process ID number that will collect the
  274. * values. This value must be the same on all processes.
  275. */
  276. template<typename T>
  277. void
  278. gather(const communicator& comm, const T& in_value, std::vector<T>& out_values,
  279. int root);
  280. /**
  281. * \overload
  282. */
  283. template<typename T>
  284. void
  285. gather(const communicator& comm, const T& in_value, T* out_values, int root);
  286. /**
  287. * \overload
  288. */
  289. template<typename T>
  290. void gather(const communicator& comm, const T& in_value, int root);
  291. /**
  292. * \overload
  293. */
  294. template<typename T>
  295. void
  296. gather(const communicator& comm, const T* in_values, int n,
  297. std::vector<T>& out_values, int root);
  298. /**
  299. * \overload
  300. */
  301. template<typename T>
  302. void
  303. gather(const communicator& comm, const T* in_values, int n, T* out_values,
  304. int root);
  305. /**
  306. * \overload
  307. */
  308. template<typename T>
  309. void gather(const communicator& comm, const T* in_values, int n, int root);
  310. /**
  311. * @brief Similar to boost::mpi::gather with the difference that the number
  312. * of values to be send by non-root processes can vary.
  313. *
  314. * @param comm The communicator over which the gather will occur.
  315. *
  316. * @param in_values The array of values to be transmitted by each process.
  317. *
  318. * @param in_size For each non-root process this specifies the size
  319. * of @p in_values.
  320. *
  321. * @param out_values A pointer to storage that will be populated with
  322. * the values from each process. For non-root processes, this parameter
  323. * may be omitted. If it is still provided, however, it will be unchanged.
  324. *
  325. * @param sizes A vector containing the number of elements each non-root
  326. * process will send.
  327. *
  328. * @param displs A vector such that the i-th entry specifies the
  329. * displacement (relative to @p out_values) from which to take the ingoing
  330. * data at the @p root process. Overloaded versions for which @p displs is
  331. * omitted assume that the data is to be placed contiguously at the root process.
  332. *
  333. * @param root The process ID number that will collect the
  334. * values. This value must be the same on all processes.
  335. */
  336. template<typename T>
  337. void
  338. gatherv(const communicator& comm, const std::vector<T>& in_values,
  339. T* out_values, const std::vector<int>& sizes, const std::vector<int>& displs,
  340. int root);
  341. /**
  342. * \overload
  343. */
  344. template<typename T>
  345. void
  346. gatherv(const communicator& comm, const T* in_values, int in_size,
  347. T* out_values, const std::vector<int>& sizes, const std::vector<int>& displs,
  348. int root);
  349. /**
  350. * \overload
  351. */
  352. template<typename T>
  353. void gatherv(const communicator& comm, const std::vector<T>& in_values, int root);
  354. /**
  355. * \overload
  356. */
  357. template<typename T>
  358. void gatherv(const communicator& comm, const T* in_values, int in_size, int root);
  359. /**
  360. * \overload
  361. */
  362. template<typename T>
  363. void
  364. gatherv(const communicator& comm, const T* in_values, int in_size,
  365. T* out_values, const std::vector<int>& sizes, int root);
  366. /**
  367. * \overload
  368. */
  369. template<typename T>
  370. void
  371. gatherv(const communicator& comm, const std::vector<T>& in_values,
  372. T* out_values, const std::vector<int>& sizes, int root);
  373. /**
  374. * @brief Scatter the values stored at the root to all processes
  375. * within the communicator.
  376. *
  377. * @c scatter is a collective algorithm that scatters the values
  378. * stored in the @p root process (inside a vector) to all of the
  379. * processes in the communicator. The vector @p out_values (only
  380. * significant at the @p root) is indexed by the process number to
  381. * which the corresponding value will be sent. The type @c T of the
  382. * values may be any type that is serializable or has an associated
  383. * MPI data type.
  384. *
  385. * When the type @c T has an associated MPI data type, this routine
  386. * invokes @c MPI_Scatter to scatter the values.
  387. *
  388. * @param comm The communicator over which the scatter will occur.
  389. *
  390. * @param in_values A vector or pointer to storage that will contain
  391. * the values to send to each process, indexed by the process rank.
  392. * For non-root processes, this parameter may be omitted. If it is
  393. * still provided, however, it will be unchanged.
  394. *
  395. * @param out_value The value received by each process. When
  396. * scattering an array of values, @p out_values points to the @p n
  397. * values that will be received by each process.
  398. *
  399. * @param root The process ID number that will scatter the
  400. * values. This value must be the same on all processes.
  401. */
  402. template<typename T>
  403. void
  404. scatter(const communicator& comm, const std::vector<T>& in_values, T& out_value,
  405. int root);
  406. /**
  407. * \overload
  408. */
  409. template<typename T>
  410. void
  411. scatter(const communicator& comm, const T* in_values, T& out_value, int root);
  412. /**
  413. * \overload
  414. */
  415. template<typename T>
  416. void scatter(const communicator& comm, T& out_value, int root);
  417. /**
  418. * \overload
  419. */
  420. template<typename T>
  421. void
  422. scatter(const communicator& comm, const std::vector<T>& in_values,
  423. T* out_values, int n, int root);
  424. /**
  425. * \overload
  426. */
  427. template<typename T>
  428. void
  429. scatter(const communicator& comm, const T* in_values, T* out_values, int n,
  430. int root);
  431. /**
  432. * \overload
  433. */
  434. template<typename T>
  435. void scatter(const communicator& comm, T* out_values, int n, int root);
  436. /**
  437. * @brief Similar to boost::mpi::scatter with the difference that the number
  438. * of values stored at the root process does not need to be a multiple of
  439. * the communicator's size.
  440. *
  441. * @param comm The communicator over which the scatter will occur.
  442. *
  443. * @param in_values A vector or pointer to storage that will contain
  444. * the values to send to each process, indexed by the process rank.
  445. * For non-root processes, this parameter may be omitted. If it is
  446. * still provided, however, it will be unchanged.
  447. *
  448. * @param sizes A vector containing the number of elements each non-root
  449. * process will receive.
  450. *
  451. * @param displs A vector such that the i-th entry specifies the
  452. * displacement (relative to @p in_values) from which to take the outgoing
  453. * data to process i. Overloaded versions for which @p displs is omitted
  454. * assume that the data is contiguous at the @p root process.
  455. *
  456. * @param out_values The array of values received by each process.
  457. *
  458. * @param out_size For each non-root process this will contain the size
  459. * of @p out_values.
  460. *
  461. * @param root The process ID number that will scatter the
  462. * values. This value must be the same on all processes.
  463. */
  464. template<typename T>
  465. void
  466. scatterv(const communicator& comm, const std::vector<T>& in_values,
  467. const std::vector<int>& sizes, const std::vector<int>& displs,
  468. T* out_values, int out_size, int root);
  469. /**
  470. * \overload
  471. */
  472. template<typename T>
  473. void
  474. scatterv(const communicator& comm, const T* in_values,
  475. const std::vector<int>& sizes, const std::vector<int>& displs,
  476. T* out_values, int out_size, int root);
  477. /**
  478. * \overload
  479. */
  480. template<typename T>
  481. void scatterv(const communicator& comm, T* out_values, int out_size, int root);
  482. /**
  483. * \overload
  484. */
  485. template<typename T>
  486. void
  487. scatterv(const communicator& comm, const T* in_values,
  488. const std::vector<int>& sizes, T* out_values, int root);
  489. /**
  490. * \overload
  491. */
  492. template<typename T>
  493. void
  494. scatterv(const communicator& comm, const std::vector<T>& in_values,
  495. const std::vector<int>& sizes, T* out_values, int root);
  496. /**
  497. * @brief Combine the values stored by each process into a single
  498. * value at the root.
  499. *
  500. * @c reduce is a collective algorithm that combines the values
  501. * stored by each process into a single value at the @c root. The
  502. * values can be combined arbitrarily, specified via a function
  503. * object. The type @c T of the values may be any type that is
  504. * serializable or has an associated MPI data type. One can think of
  505. * this operation as a @c gather to the @p root, followed by an @c
  506. * std::accumulate() over the gathered values and using the operation
  507. * @c op.
  508. *
  509. * When the type @c T has an associated MPI data type, this routine
  510. * invokes @c MPI_Reduce to perform the reduction. If possible,
  511. * built-in MPI operations will be used; otherwise, @c reduce() will
  512. * create a custom MPI_Op for the call to MPI_Reduce.
  513. *
  514. * @param comm The communicator over which the reduction will
  515. * occur.
  516. *
  517. * @param in_value The local value to be combined with the local
  518. * values of every other process. For reducing arrays, @c in_values
  519. * contains a pointer to the local values. In this case, @c n is
  520. * the number of values that will be reduced. Reduction occurs
  521. * independently for each of the @p n values referenced by @p
  522. * in_values, e.g., calling reduce on an array of @p n values is
  523. * like calling @c reduce @p n separate times, one for each
  524. * location in @p in_values and @p out_values.
  525. *
  526. * @param out_value Will receive the result of the reduction
  527. * operation, but only for the @p root process. Non-root processes
  528. * may omit if parameter; if they choose to supply the parameter,
  529. * it will be unchanged. For reducing arrays, @c out_values
  530. * contains a pointer to the storage for the output values.
  531. *
  532. * @param op The binary operation that combines two values of type
  533. * @c T into a third value of type @c T. For types @c T that has
  534. * ssociated MPI data types, @c op will either be translated into
  535. * an @c MPI_Op (via @c MPI_Op_create) or, if possible, mapped
  536. * directly to a built-in MPI operation. See @c is_mpi_op in the @c
  537. * operations.hpp header for more details on this mapping. For any
  538. * non-built-in operation, commutativity will be determined by the
  539. * @c is_commmutative trait (also in @c operations.hpp): users are
  540. * encouraged to mark commutative operations as such, because it
  541. * gives the implementation additional lattitude to optimize the
  542. * reduction operation.
  543. *
  544. * @param root The process ID number that will receive the final,
  545. * combined value. This value must be the same on all processes.
  546. */
  547. template<typename T, typename Op>
  548. void
  549. reduce(const communicator& comm, const T& in_value, T& out_value, Op op,
  550. int root);
  551. /**
  552. * \overload
  553. */
  554. template<typename T, typename Op>
  555. void reduce(const communicator& comm, const T& in_value, Op op, int root);
  556. /**
  557. * \overload
  558. */
  559. template<typename T, typename Op>
  560. void
  561. reduce(const communicator& comm, const T* in_values, int n, T* out_values,
  562. Op op, int root);
  563. /**
  564. * \overload
  565. */
  566. template<typename T, typename Op>
  567. void
  568. reduce(const communicator& comm, const T* in_values, int n, Op op, int root);
  569. /**
  570. * @brief Compute a prefix reduction of values from all processes in
  571. * the communicator.
  572. *
  573. * @c scan is a collective algorithm that combines the values stored
  574. * by each process with the values of all processes with a smaller
  575. * rank. The values can be arbitrarily combined, specified via a
  576. * function object @p op. The type @c T of the values may be any type
  577. * that is serializable or has an associated MPI data type. One can
  578. * think of this operation as a @c gather to some process, followed
  579. * by an @c std::prefix_sum() over the gathered values using the
  580. * operation @c op. The ith process returns the ith value emitted by
  581. * @c std::prefix_sum().
  582. *
  583. * When the type @c T has an associated MPI data type, this routine
  584. * invokes @c MPI_Scan to perform the reduction. If possible,
  585. * built-in MPI operations will be used; otherwise, @c scan() will
  586. * create a custom @c MPI_Op for the call to MPI_Scan.
  587. *
  588. * @param comm The communicator over which the prefix reduction
  589. * will occur.
  590. *
  591. * @param in_value The local value to be combined with the local
  592. * values of other processes. For the array variant, the @c
  593. * in_values parameter points to the @c n local values that will be
  594. * combined.
  595. *
  596. * @param out_value If provided, the ith process will receive the
  597. * value @c op(in_value[0], op(in_value[1], op(..., in_value[i])
  598. * ... )). For the array variant, @c out_values contains a pointer
  599. * to storage for the @c n output values. The prefix reduction
  600. * occurs independently for each of the @p n values referenced by
  601. * @p in_values, e.g., calling scan on an array of @p n values is
  602. * like calling @c scan @p n separate times, one for each location
  603. * in @p in_values and @p out_values.
  604. *
  605. * @param op The binary operation that combines two values of type
  606. * @c T into a third value of type @c T. For types @c T that has
  607. * ssociated MPI data types, @c op will either be translated into
  608. * an @c MPI_Op (via @c MPI_Op_create) or, if possible, mapped
  609. * directly to a built-in MPI operation. See @c is_mpi_op in the @c
  610. * operations.hpp header for more details on this mapping. For any
  611. * non-built-in operation, commutativity will be determined by the
  612. * @c is_commmutative trait (also in @c operations.hpp).
  613. *
  614. * @returns If no @p out_value parameter is provided, returns the
  615. * result of prefix reduction.
  616. */
  617. template<typename T, typename Op>
  618. void
  619. scan(const communicator& comm, const T& in_value, T& out_value, Op op);
  620. /**
  621. * \overload
  622. */
  623. template<typename T, typename Op>
  624. T
  625. scan(const communicator& comm, const T& in_value, Op op);
  626. /**
  627. * \overload
  628. */
  629. template<typename T, typename Op>
  630. void
  631. scan(const communicator& comm, const T* in_values, int n, T* out_values, Op op);
  632. } } // end namespace boost::mpi
  633. #endif // BOOST_MPI_COLLECTIVES_HPP
  634. #ifndef BOOST_MPI_COLLECTIVES_FORWARD_ONLY
  635. // Include implementations of each of the collectives
  636. # include <boost/mpi/collectives/all_gather.hpp>
  637. # include <boost/mpi/collectives/all_reduce.hpp>
  638. # include <boost/mpi/collectives/all_to_all.hpp>
  639. # include <boost/mpi/collectives/broadcast.hpp>
  640. # include <boost/mpi/collectives/gather.hpp>
  641. # include <boost/mpi/collectives/gatherv.hpp>
  642. # include <boost/mpi/collectives/scatter.hpp>
  643. # include <boost/mpi/collectives/scatterv.hpp>
  644. # include <boost/mpi/collectives/reduce.hpp>
  645. # include <boost/mpi/collectives/scan.hpp>
  646. #endif