meta_kernel.hpp 31 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145
  1. //---------------------------------------------------------------------------//
  2. // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
  3. //
  4. // Distributed under the Boost Software License, Version 1.0
  5. // See accompanying file LICENSE_1_0.txt or copy at
  6. // http://www.boost.org/LICENSE_1_0.txt
  7. //
  8. // See http://boostorg.github.com/compute for more information.
  9. //---------------------------------------------------------------------------//
  10. #ifndef BOOST_COMPUTE_DETAIL_META_KERNEL_HPP
  11. #define BOOST_COMPUTE_DETAIL_META_KERNEL_HPP
  12. #include <set>
  13. #include <string>
  14. #include <vector>
  15. #include <iomanip>
  16. #include <sstream>
  17. #include <utility>
  18. #include <boost/tuple/tuple.hpp>
  19. #include <boost/type_traits.hpp>
  20. #include <boost/lexical_cast.hpp>
  21. #include <boost/static_assert.hpp>
  22. #include <boost/algorithm/string/find.hpp>
  23. #include <boost/preprocessor/repetition.hpp>
  24. #include <boost/compute/kernel.hpp>
  25. #include <boost/compute/closure.hpp>
  26. #include <boost/compute/function.hpp>
  27. #include <boost/compute/functional.hpp>
  28. #include <boost/compute/type_traits.hpp>
  29. #include <boost/compute/command_queue.hpp>
  30. #include <boost/compute/image/image2d.hpp>
  31. #include <boost/compute/image/image_sampler.hpp>
  32. #include <boost/compute/memory_object.hpp>
  33. #include <boost/compute/memory/svm_ptr.hpp>
  34. #include <boost/compute/detail/device_ptr.hpp>
  35. #include <boost/compute/detail/sha1.hpp>
  36. #include <boost/compute/utility/program_cache.hpp>
  37. namespace boost {
  38. namespace compute {
  39. namespace detail {
  40. template<class T>
  41. class meta_kernel_variable
  42. {
  43. public:
  44. typedef T result_type;
  45. meta_kernel_variable(const std::string &name)
  46. : m_name(name)
  47. {
  48. }
  49. meta_kernel_variable(const meta_kernel_variable &other)
  50. : m_name(other.m_name)
  51. {
  52. }
  53. meta_kernel_variable& operator=(const meta_kernel_variable &other)
  54. {
  55. if(this != &other){
  56. m_name = other.m_name;
  57. }
  58. return *this;
  59. }
  60. ~meta_kernel_variable()
  61. {
  62. }
  63. std::string name() const
  64. {
  65. return m_name;
  66. }
  67. private:
  68. std::string m_name;
  69. };
  70. template<class T>
  71. class meta_kernel_literal
  72. {
  73. public:
  74. typedef T result_type;
  75. meta_kernel_literal(const T &value)
  76. : m_value(value)
  77. {
  78. }
  79. meta_kernel_literal(const meta_kernel_literal &other)
  80. : m_value(other.m_value)
  81. {
  82. }
  83. meta_kernel_literal& operator=(const meta_kernel_literal &other)
  84. {
  85. if(this != &other){
  86. m_value = other.m_value;
  87. }
  88. return *this;
  89. }
  90. ~meta_kernel_literal()
  91. {
  92. }
  93. const T& value() const
  94. {
  95. return m_value;
  96. }
  97. private:
  98. T m_value;
  99. };
  100. struct meta_kernel_stored_arg
  101. {
  102. meta_kernel_stored_arg()
  103. : m_size(0),
  104. m_value(0)
  105. {
  106. }
  107. meta_kernel_stored_arg(const meta_kernel_stored_arg &other)
  108. : m_size(0),
  109. m_value(0)
  110. {
  111. set_value(other.m_size, other.m_value);
  112. }
  113. meta_kernel_stored_arg& operator=(const meta_kernel_stored_arg &other)
  114. {
  115. if(this != &other){
  116. set_value(other.m_size, other.m_value);
  117. }
  118. return *this;
  119. }
  120. template<class T>
  121. meta_kernel_stored_arg(const T &value)
  122. : m_size(0),
  123. m_value(0)
  124. {
  125. set_value(value);
  126. }
  127. ~meta_kernel_stored_arg()
  128. {
  129. if(m_value){
  130. std::free(m_value);
  131. }
  132. }
  133. void set_value(size_t size, const void *value)
  134. {
  135. if(m_value){
  136. std::free(m_value);
  137. }
  138. m_size = size;
  139. if(value){
  140. m_value = std::malloc(size);
  141. std::memcpy(m_value, value, size);
  142. }
  143. else {
  144. m_value = 0;
  145. }
  146. }
  147. template<class T>
  148. void set_value(const T &value)
  149. {
  150. set_value(sizeof(T), boost::addressof(value));
  151. }
  152. size_t m_size;
  153. void *m_value;
  154. };
  155. struct meta_kernel_buffer_info
  156. {
  157. meta_kernel_buffer_info(const buffer &buffer,
  158. const std::string &id,
  159. memory_object::address_space addr_space,
  160. size_t i)
  161. : m_mem(buffer.get()),
  162. identifier(id),
  163. address_space(addr_space),
  164. index(i)
  165. {
  166. }
  167. cl_mem m_mem;
  168. std::string identifier;
  169. memory_object::address_space address_space;
  170. size_t index;
  171. };
  172. struct meta_kernel_svm_info
  173. {
  174. template <class T>
  175. meta_kernel_svm_info(const svm_ptr<T> ptr,
  176. const std::string &id,
  177. memory_object::address_space addr_space,
  178. size_t i)
  179. : ptr(ptr.get()),
  180. identifier(id),
  181. address_space(addr_space),
  182. index(i)
  183. {
  184. }
  185. void* ptr;
  186. std::string identifier;
  187. memory_object::address_space address_space;
  188. size_t index;
  189. };
  190. class meta_kernel;
  191. template<class Type>
  192. struct inject_type_impl
  193. {
  194. void operator()(meta_kernel &)
  195. {
  196. // default implementation does nothing
  197. }
  198. };
  199. #define BOOST_COMPUTE_META_KERNEL_DECLARE_SCALAR_TYPE_STREAM_OPERATOR(type) \
  200. meta_kernel& operator<<(const type &x) \
  201. { \
  202. m_source << x; \
  203. return *this; \
  204. }
  205. #define BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(type) \
  206. meta_kernel& operator<<(const type &x) \
  207. { \
  208. m_source << "(" << type_name<type>() << ")"; \
  209. m_source << "("; \
  210. for(size_t i = 0; i < vector_size<type>::value; i++){ \
  211. *this << lit(x[i]); \
  212. \
  213. if(i != vector_size<type>::value - 1){ \
  214. m_source << ","; \
  215. } \
  216. } \
  217. m_source << ")"; \
  218. return *this; \
  219. }
  220. #define BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(type) \
  221. BOOST_COMPUTE_META_KERNEL_DECLARE_SCALAR_TYPE_STREAM_OPERATOR(BOOST_PP_CAT(type, _)) \
  222. BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(BOOST_PP_CAT(BOOST_PP_CAT(type, 2), _)) \
  223. BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(BOOST_PP_CAT(BOOST_PP_CAT(type, 4), _)) \
  224. BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(BOOST_PP_CAT(BOOST_PP_CAT(type, 8), _)) \
  225. BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(BOOST_PP_CAT(BOOST_PP_CAT(type, 16), _))
  226. class meta_kernel
  227. {
  228. public:
  229. template<class T>
  230. class argument
  231. {
  232. public:
  233. argument(const std::string &name, size_t index)
  234. : m_name(name),
  235. m_index(index)
  236. {
  237. }
  238. const std::string &name() const
  239. {
  240. return m_name;
  241. }
  242. size_t index() const
  243. {
  244. return m_index;
  245. }
  246. private:
  247. std::string m_name;
  248. size_t m_index;
  249. };
  250. explicit meta_kernel(const std::string &name)
  251. : m_name(name)
  252. {
  253. }
  254. meta_kernel(const meta_kernel &other)
  255. {
  256. m_source.str(other.m_source.str());
  257. m_options = other.m_options;
  258. }
  259. meta_kernel& operator=(const meta_kernel &other)
  260. {
  261. if(this != &other){
  262. m_source.str(other.m_source.str());
  263. m_options = other.m_options;
  264. }
  265. return *this;
  266. }
  267. ~meta_kernel()
  268. {
  269. }
  270. std::string name() const
  271. {
  272. return m_name;
  273. }
  274. std::string source() const
  275. {
  276. std::stringstream stream;
  277. // add pragmas
  278. if(!m_pragmas.empty()){
  279. stream << m_pragmas << "\n";
  280. }
  281. // add macros
  282. stream << "#define boost_pair_type(t1, t2) _pair_ ## t1 ## _ ## t2 ## _t\n";
  283. stream << "#define boost_pair_get(x, n) (n == 0 ? x.first ## x.second)\n";
  284. stream << "#define boost_make_pair(t1, x, t2, y) (boost_pair_type(t1, t2)) { x, y }\n";
  285. stream << "#define boost_tuple_get(x, n) (x.v ## n)\n";
  286. // add type declaration source
  287. stream << m_type_declaration_source.str() << "\n";
  288. // add external function source
  289. stream << m_external_function_source.str() << "\n";
  290. // add kernel source
  291. stream << "__kernel void " << m_name
  292. << "(" << boost::join(m_args, ", ") << ")\n"
  293. << "{\n" << m_source.str() << "\n}\n";
  294. return stream.str();
  295. }
  296. kernel compile(const context &context, const std::string &options = std::string())
  297. {
  298. // generate the program source
  299. std::string source = this->source();
  300. // generate cache key
  301. std::string cache_key = "__boost_meta_kernel_" +
  302. static_cast<std::string>(detail::sha1(source));
  303. // load program cache
  304. boost::shared_ptr<program_cache> cache =
  305. program_cache::get_global_cache(context);
  306. std::string compile_options = m_options + options;
  307. // load (or build) program from cache
  308. ::boost::compute::program program =
  309. cache->get_or_build(cache_key, compile_options, source, context);
  310. // create kernel
  311. ::boost::compute::kernel kernel = program.create_kernel(name());
  312. // bind stored args
  313. for(size_t i = 0; i < m_stored_args.size(); i++){
  314. const detail::meta_kernel_stored_arg &arg = m_stored_args[i];
  315. if(arg.m_size != 0){
  316. kernel.set_arg(i, arg.m_size, arg.m_value);
  317. }
  318. }
  319. // bind buffer args
  320. for(size_t i = 0; i < m_stored_buffers.size(); i++){
  321. const detail::meta_kernel_buffer_info &bi = m_stored_buffers[i];
  322. kernel.set_arg(bi.index, bi.m_mem);
  323. }
  324. // bind svm args
  325. for(size_t i = 0; i < m_stored_svm_ptrs.size(); i++){
  326. const detail::meta_kernel_svm_info &spi = m_stored_svm_ptrs[i];
  327. kernel.set_arg_svm_ptr(spi.index, spi.ptr);
  328. }
  329. return kernel;
  330. }
  331. template<class T>
  332. size_t add_arg(const std::string &name)
  333. {
  334. std::stringstream stream;
  335. stream << type<T>() << " " << name;
  336. // add argument to list
  337. m_args.push_back(stream.str());
  338. // return index
  339. return m_args.size() - 1;
  340. }
  341. template<class T>
  342. size_t add_arg(memory_object::address_space address_space,
  343. const std::string &name)
  344. {
  345. return add_arg_with_qualifiers<T>(address_space_prefix(address_space), name);
  346. }
  347. template<class T>
  348. void set_arg(size_t index, const T &value)
  349. {
  350. if(index >= m_stored_args.size()){
  351. m_stored_args.resize(index + 1);
  352. }
  353. m_stored_args[index] = detail::meta_kernel_stored_arg(value);
  354. }
  355. void set_arg(size_t index, const memory_object &mem)
  356. {
  357. set_arg<cl_mem>(index, mem.get());
  358. }
  359. void set_arg(size_t index, const image_sampler &sampler)
  360. {
  361. set_arg<cl_sampler>(index, cl_sampler(sampler));
  362. }
  363. template<class T>
  364. size_t add_set_arg(const std::string &name, const T &value)
  365. {
  366. size_t index = add_arg<T>(name);
  367. set_arg<T>(index, value);
  368. return index;
  369. }
  370. void add_extension_pragma(const std::string &extension,
  371. const std::string &value = "enable")
  372. {
  373. m_pragmas += "#pragma OPENCL EXTENSION " + extension + " : " + value + "\n";
  374. }
  375. void add_extension_pragma(const std::string &extension,
  376. const std::string &value) const
  377. {
  378. return const_cast<meta_kernel *>(this)->add_extension_pragma(extension, value);
  379. }
  380. template<class T>
  381. std::string type() const
  382. {
  383. std::stringstream stream;
  384. // const qualifier
  385. if(boost::is_const<T>::value){
  386. stream << "const ";
  387. }
  388. // volatile qualifier
  389. if(boost::is_volatile<T>::value){
  390. stream << "volatile ";
  391. }
  392. // type
  393. typedef
  394. typename boost::remove_cv<
  395. typename boost::remove_pointer<T>::type
  396. >::type Type;
  397. stream << type_name<Type>();
  398. // pointer
  399. if(boost::is_pointer<T>::value){
  400. stream << "*";
  401. }
  402. // inject type pragmas and/or definitions
  403. inject_type<Type>();
  404. return stream.str();
  405. }
  406. template<class T>
  407. std::string decl(const std::string &name) const
  408. {
  409. return type<T>() + " " + name;
  410. }
  411. template<class T, class Expr>
  412. std::string decl(const std::string &name, const Expr &init) const
  413. {
  414. meta_kernel tmp((std::string()));
  415. tmp << tmp.decl<T>(name) << " = " << init;
  416. return tmp.m_source.str();
  417. }
  418. template<class T>
  419. detail::meta_kernel_variable<T> var(const std::string &name) const
  420. {
  421. type<T>();
  422. return make_var<T>(name);
  423. }
  424. template<class T>
  425. detail::meta_kernel_literal<T> lit(const T &value) const
  426. {
  427. type<T>();
  428. return detail::meta_kernel_literal<T>(value);
  429. }
  430. template<class T>
  431. detail::meta_kernel_variable<T> expr(const std::string &expr) const
  432. {
  433. type<T>();
  434. return detail::meta_kernel_variable<T>(expr);
  435. }
  436. // define stream operators for scalar and vector types
  437. BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(char)
  438. BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(uchar)
  439. BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(short)
  440. BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(ushort)
  441. BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(int)
  442. BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(uint)
  443. BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(long)
  444. BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(ulong)
  445. BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(double)
  446. // define stream operators for float scalar and vector types
  447. meta_kernel& operator<<(const float &x)
  448. {
  449. m_source << std::showpoint << x << 'f';
  450. return *this;
  451. }
  452. BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(float2_)
  453. BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(float4_)
  454. BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(float8_)
  455. BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(float16_)
  456. // define stream operators for variable types
  457. template<class T>
  458. meta_kernel& operator<<(const meta_kernel_variable<T> &variable)
  459. {
  460. return *this << variable.name();
  461. }
  462. // define stream operators for literal types
  463. template<class T>
  464. meta_kernel& operator<<(const meta_kernel_literal<T> &literal)
  465. {
  466. return *this << literal.value();
  467. }
  468. meta_kernel& operator<<(const meta_kernel_literal<bool> &literal)
  469. {
  470. return *this << (literal.value() ? "true" : "false");
  471. }
  472. meta_kernel& operator<<(const meta_kernel_literal<char> &literal)
  473. {
  474. const char c = literal.value();
  475. switch(c){
  476. // control characters
  477. case '\0':
  478. return *this << "'\\0'";
  479. case '\a':
  480. return *this << "'\\a'";
  481. case '\b':
  482. return *this << "'\\b'";
  483. case '\t':
  484. return *this << "'\\t'";
  485. case '\n':
  486. return *this << "'\\n'";
  487. case '\v':
  488. return *this << "'\\v'";
  489. case '\f':
  490. return *this << "'\\f'";
  491. case '\r':
  492. return *this << "'\\r'";
  493. // characters which need escaping
  494. case '\"':
  495. case '\'':
  496. case '\?':
  497. case '\\':
  498. return *this << "'\\" << c << "'";
  499. // all other characters
  500. default:
  501. return *this << "'" << c << "'";
  502. }
  503. }
  504. meta_kernel& operator<<(const meta_kernel_literal<signed char> &literal)
  505. {
  506. return *this << lit<char>(literal.value());
  507. }
  508. meta_kernel& operator<<(const meta_kernel_literal<unsigned char> &literal)
  509. {
  510. return *this << uint_(literal.value());
  511. }
  512. // define stream operators for strings
  513. meta_kernel& operator<<(char ch)
  514. {
  515. m_source << ch;
  516. return *this;
  517. }
  518. meta_kernel& operator<<(const char *string)
  519. {
  520. m_source << string;
  521. return *this;
  522. }
  523. meta_kernel& operator<<(const std::string &string)
  524. {
  525. m_source << string;
  526. return *this;
  527. }
  528. template<class T>
  529. static detail::meta_kernel_variable<T> make_var(const std::string &name)
  530. {
  531. return detail::meta_kernel_variable<T>(name);
  532. }
  533. template<class T>
  534. static detail::meta_kernel_literal<T> make_lit(const T &value)
  535. {
  536. return detail::meta_kernel_literal<T>(value);
  537. }
  538. template<class T>
  539. static detail::meta_kernel_variable<T> make_expr(const std::string &expr)
  540. {
  541. return detail::meta_kernel_variable<T>(expr);
  542. }
  543. event exec(command_queue &queue)
  544. {
  545. return exec_1d(queue, 0, 1);
  546. }
  547. event exec_1d(command_queue &queue,
  548. size_t global_work_offset,
  549. size_t global_work_size,
  550. const wait_list &events = wait_list())
  551. {
  552. const context &context = queue.get_context();
  553. ::boost::compute::kernel kernel = compile(context);
  554. return queue.enqueue_1d_range_kernel(
  555. kernel,
  556. global_work_offset,
  557. global_work_size,
  558. 0,
  559. events
  560. );
  561. }
  562. event exec_1d(command_queue &queue,
  563. size_t global_work_offset,
  564. size_t global_work_size,
  565. size_t local_work_size,
  566. const wait_list &events = wait_list())
  567. {
  568. const context &context = queue.get_context();
  569. ::boost::compute::kernel kernel = compile(context);
  570. return queue.enqueue_1d_range_kernel(
  571. kernel,
  572. global_work_offset,
  573. global_work_size,
  574. local_work_size,
  575. events
  576. );
  577. }
  578. template<class T>
  579. std::string get_buffer_identifier(const buffer &buffer,
  580. const memory_object::address_space address_space =
  581. memory_object::global_memory)
  582. {
  583. // check if we've already seen buffer
  584. for(size_t i = 0; i < m_stored_buffers.size(); i++){
  585. const detail::meta_kernel_buffer_info &bi = m_stored_buffers[i];
  586. if(bi.m_mem == buffer.get() &&
  587. bi.address_space == address_space){
  588. return bi.identifier;
  589. }
  590. }
  591. // create a new binding
  592. std::string identifier =
  593. "_buf" + lexical_cast<std::string>(m_stored_buffers.size());
  594. size_t index = add_arg<T *>(address_space, identifier);
  595. // store new buffer info
  596. m_stored_buffers.push_back(
  597. detail::meta_kernel_buffer_info(buffer, identifier, address_space, index));
  598. return identifier;
  599. }
  600. template<class T>
  601. std::string get_svm_identifier(const svm_ptr<T> &svm_ptr,
  602. const memory_object::address_space address_space =
  603. memory_object::global_memory)
  604. {
  605. BOOST_ASSERT(
  606. (address_space == memory_object::global_memory)
  607. || (address_space == memory_object::constant_memory)
  608. );
  609. // check if we've already seen this pointer
  610. for(size_t i = 0; i < m_stored_svm_ptrs.size(); i++){
  611. const detail::meta_kernel_svm_info &spi = m_stored_svm_ptrs[i];
  612. if(spi.ptr == svm_ptr.get() &&
  613. spi.address_space == address_space){
  614. return spi.identifier;
  615. }
  616. }
  617. // create a new binding
  618. std::string identifier =
  619. "_svm_ptr" + lexical_cast<std::string>(m_stored_svm_ptrs.size());
  620. size_t index = add_arg<T *>(address_space, identifier);
  621. if(m_stored_svm_ptrs.empty()) {
  622. m_options += std::string(" -cl-std=CL2.0");
  623. }
  624. // store new svm pointer info
  625. m_stored_svm_ptrs.push_back(
  626. detail::meta_kernel_svm_info(
  627. svm_ptr, identifier, address_space, index
  628. )
  629. );
  630. return identifier;
  631. }
  632. std::string get_image_identifier(const char *qualifiers, const image2d &image)
  633. {
  634. size_t index = add_arg_with_qualifiers<image2d>(qualifiers, "image");
  635. set_arg(index, image);
  636. return "image";
  637. }
  638. std::string get_sampler_identifier(bool normalized_coords,
  639. cl_addressing_mode addressing_mode,
  640. cl_filter_mode filter_mode)
  641. {
  642. (void) normalized_coords;
  643. (void) addressing_mode;
  644. (void) filter_mode;
  645. m_pragmas += "const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |\n"
  646. " CLK_ADDRESS_NONE |\n"
  647. " CLK_FILTER_NEAREST;\n";
  648. return "sampler";
  649. }
  650. template<class Expr>
  651. static std::string expr_to_string(const Expr &expr)
  652. {
  653. meta_kernel tmp((std::string()));
  654. tmp << expr;
  655. return tmp.m_source.str();
  656. }
  657. template<class Predicate>
  658. detail::invoked_function<bool, boost::tuple<Predicate> > if_(Predicate pred) const
  659. {
  660. return detail::invoked_function<bool, boost::tuple<Predicate> >(
  661. "if", std::string(), boost::make_tuple(pred)
  662. );
  663. }
  664. template<class Predicate>
  665. detail::invoked_function<bool, boost::tuple<Predicate> > else_if_(Predicate pred) const
  666. {
  667. return detail::invoked_function<bool, boost::tuple<Predicate> >(
  668. "else if", std::string(), boost::make_tuple(pred)
  669. );
  670. }
  671. detail::meta_kernel_variable<cl_uint> get_global_id(size_t dim) const
  672. {
  673. return expr<cl_uint>("get_global_id(" + lexical_cast<std::string>(dim) + ")");
  674. }
  675. void add_function(const std::string &name, const std::string &source)
  676. {
  677. if(m_external_function_names.count(name)){
  678. return;
  679. }
  680. m_external_function_names.insert(name);
  681. m_external_function_source << source << "\n";
  682. }
  683. void add_function(const std::string &name,
  684. const std::string &source,
  685. const std::map<std::string, std::string> &definitions)
  686. {
  687. typedef std::map<std::string, std::string>::const_iterator iter;
  688. std::stringstream s;
  689. // add #define's
  690. for(iter i = definitions.begin(); i != definitions.end(); i++){
  691. s << "#define " << i->first;
  692. if(!i->second.empty()){
  693. s << " " << i->second;
  694. }
  695. s << "\n";
  696. }
  697. s << source << "\n";
  698. // add #undef's
  699. for(iter i = definitions.begin(); i != definitions.end(); i++){
  700. s << "#undef " << i->first << "\n";
  701. }
  702. add_function(name, s.str());
  703. }
  704. template<class Type>
  705. void add_type_declaration(const std::string &declaration)
  706. {
  707. const char *name = type_name<Type>();
  708. // check if the type has already been declared
  709. std::string source = m_type_declaration_source.str();
  710. if(source.find(name) != std::string::npos){
  711. return;
  712. }
  713. m_type_declaration_source << declaration;
  714. }
  715. template<class Type>
  716. void inject_type() const
  717. {
  718. inject_type_impl<Type>()(const_cast<meta_kernel &>(*this));
  719. }
  720. // the insert_function_call() method inserts a call to a function with
  721. // the given name tuple of argument values.
  722. template<class ArgTuple>
  723. void insert_function_call(const std::string &name, const ArgTuple &args)
  724. {
  725. *this << name << '(';
  726. insert_function_call_args(args);
  727. *this << ')';
  728. }
  729. // the insert_function_call_args() method takes a tuple of argument values
  730. // and inserts them into the source string with a comma in-between each.
  731. // this is useful for creating function calls given a tuple of values.
  732. void insert_function_call_args(const boost::tuple<>&)
  733. {
  734. }
  735. #define BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARG_TYPE(z, n, unused) \
  736. inject_type<BOOST_PP_CAT(T, n)>();
  737. #define BOOST_COMPUTE_META_KERNEL_STREAM_FUNCTION_ARG(z, n, unused) \
  738. << boost::get<BOOST_PP_DEC(n)>(args) << ", "
  739. #define BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARGS(z, n, unused) \
  740. template<BOOST_PP_ENUM_PARAMS(n, class T)> \
  741. void insert_function_call_args( \
  742. const boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> &args \
  743. ) \
  744. { \
  745. BOOST_PP_REPEAT_FROM_TO( \
  746. 0, n, BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARG_TYPE, ~ \
  747. ) \
  748. *this \
  749. BOOST_PP_REPEAT_FROM_TO( \
  750. 1, n, BOOST_COMPUTE_META_KERNEL_STREAM_FUNCTION_ARG, ~ \
  751. ) \
  752. << boost::get<BOOST_PP_DEC(n)>(args); \
  753. }
  754. BOOST_PP_REPEAT_FROM_TO(
  755. 1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARGS, ~
  756. )
  757. #undef BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARG_TYPE
  758. #undef BOOST_COMPUTE_META_KERNEL_STREAM_FUNCTION_ARG
  759. #undef BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARGS
  760. static const char* address_space_prefix(const memory_object::address_space value)
  761. {
  762. switch(value){
  763. case memory_object::global_memory: return "__global";
  764. case memory_object::local_memory: return "__local";
  765. case memory_object::private_memory: return "__private";
  766. case memory_object::constant_memory: return "__constant";
  767. };
  768. return 0; // unreachable
  769. }
  770. private:
  771. template<class T>
  772. size_t add_arg_with_qualifiers(const char *qualifiers, const std::string &name)
  773. {
  774. size_t index = add_arg<T>(name);
  775. // update argument type declaration with qualifiers
  776. std::stringstream s;
  777. s << qualifiers << " " << m_args[index];
  778. m_args[index] = s.str();
  779. return index;
  780. }
  781. private:
  782. std::string m_name;
  783. std::stringstream m_source;
  784. std::stringstream m_external_function_source;
  785. std::stringstream m_type_declaration_source;
  786. std::set<std::string> m_external_function_names;
  787. std::vector<std::string> m_args;
  788. std::string m_pragmas;
  789. std::string m_options;
  790. std::vector<detail::meta_kernel_stored_arg> m_stored_args;
  791. std::vector<detail::meta_kernel_buffer_info> m_stored_buffers;
  792. std::vector<detail::meta_kernel_svm_info> m_stored_svm_ptrs;
  793. };
  794. template<class ResultType, class ArgTuple>
  795. inline meta_kernel&
  796. operator<<(meta_kernel &kernel, const invoked_function<ResultType, ArgTuple> &expr)
  797. {
  798. if(!expr.source().empty()){
  799. kernel.add_function(expr.name(), expr.source(), expr.definitions());
  800. }
  801. kernel.insert_function_call(expr.name(), expr.args());
  802. return kernel;
  803. }
  804. template<class ResultType, class ArgTuple, class CaptureTuple>
  805. inline meta_kernel&
  806. operator<<(meta_kernel &kernel,
  807. const invoked_closure<ResultType, ArgTuple, CaptureTuple> &expr)
  808. {
  809. if(!expr.source().empty()){
  810. kernel.add_function(expr.name(), expr.source(), expr.definitions());
  811. }
  812. kernel << expr.name() << '(';
  813. kernel.insert_function_call_args(expr.args());
  814. kernel << ", ";
  815. kernel.insert_function_call_args(expr.capture());
  816. kernel << ')';
  817. return kernel;
  818. }
  819. template<class Arg1, class Arg2, class Result>
  820. inline meta_kernel& operator<<(meta_kernel &kernel,
  821. const invoked_binary_operator<Arg1,
  822. Arg2,
  823. Result> &expr)
  824. {
  825. return kernel << "((" << expr.arg1() << ")"
  826. << expr.op()
  827. << "(" << expr.arg2() << "))";
  828. }
  829. template<class T, class IndexExpr>
  830. inline meta_kernel& operator<<(meta_kernel &kernel,
  831. const detail::device_ptr_index_expr<T, IndexExpr> &expr)
  832. {
  833. if(expr.m_index == 0){
  834. return kernel <<
  835. kernel.get_buffer_identifier<T>(expr.m_buffer) <<
  836. '[' << expr.m_expr << ']';
  837. }
  838. else {
  839. return kernel <<
  840. kernel.get_buffer_identifier<T>(expr.m_buffer) <<
  841. '[' << expr.m_index << "+(" << expr.m_expr << ")]";
  842. }
  843. }
  844. template<class T1, class T2, class IndexExpr>
  845. inline meta_kernel& operator<<(meta_kernel &kernel,
  846. const detail::device_ptr_index_expr<std::pair<T1, T2>, IndexExpr> &expr)
  847. {
  848. typedef std::pair<T1, T2> T;
  849. if(expr.m_index == 0){
  850. return kernel <<
  851. kernel.get_buffer_identifier<T>(expr.m_buffer) <<
  852. '[' << expr.m_expr << ']';
  853. }
  854. else {
  855. return kernel <<
  856. kernel.get_buffer_identifier<T>(expr.m_buffer) <<
  857. '[' << expr.m_index << "+(" << expr.m_expr << ")]";
  858. }
  859. }
  860. // SVM requires OpenCL 2.0
  861. #if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
  862. template<class T, class IndexExpr>
  863. inline meta_kernel& operator<<(meta_kernel &kernel,
  864. const svm_ptr_index_expr<T, IndexExpr> &expr)
  865. {
  866. return kernel <<
  867. kernel.get_svm_identifier<T>(expr.m_svm_ptr) <<
  868. '[' << expr.m_expr << ']';
  869. }
  870. #endif
  871. template<class Predicate, class Arg>
  872. inline meta_kernel& operator<<(meta_kernel &kernel,
  873. const invoked_unary_negate_function<Predicate,
  874. Arg> &expr)
  875. {
  876. return kernel << "!(" << expr.pred()(expr.expr()) << ')';
  877. }
  878. template<class Predicate, class Arg1, class Arg2>
  879. inline meta_kernel& operator<<(meta_kernel &kernel,
  880. const invoked_binary_negate_function<Predicate,
  881. Arg1,
  882. Arg2> &expr)
  883. {
  884. return kernel << "!(" << expr.pred()(expr.expr1(), expr.expr2()) << ')';
  885. }
  886. // get<N>() for vector types
  887. template<size_t N, class Arg, class T>
  888. inline meta_kernel& operator<<(meta_kernel &kernel,
  889. const invoked_get<N, Arg, T> &expr)
  890. {
  891. BOOST_STATIC_ASSERT(N < 16);
  892. if(N < 10){
  893. return kernel << expr.m_arg << ".s" << int_(N);
  894. }
  895. else if(N < 16){
  896. #ifdef _MSC_VER
  897. # pragma warning(push)
  898. # pragma warning(disable: 4307)
  899. #endif
  900. return kernel << expr.m_arg << ".s" << char('a' + (N - 10));
  901. #ifdef _MSC_VER
  902. # pragma warning(pop)
  903. #endif
  904. }
  905. return kernel;
  906. }
  907. template<class T, class Arg>
  908. inline meta_kernel& operator<<(meta_kernel &kernel,
  909. const invoked_field<T, Arg> &expr)
  910. {
  911. return kernel << expr.m_arg << "." << expr.m_field;
  912. }
  913. template<class T, class Arg>
  914. inline meta_kernel& operator<<(meta_kernel &k,
  915. const invoked_as<T, Arg> &expr)
  916. {
  917. return k << "as_" << type_name<T>() << "(" << expr.m_arg << ")";
  918. }
  919. template<class T, class Arg>
  920. inline meta_kernel& operator<<(meta_kernel &k,
  921. const invoked_convert<T, Arg> &expr)
  922. {
  923. return k << "convert_" << type_name<T>() << "(" << expr.m_arg << ")";
  924. }
  925. template<class T, class Arg>
  926. inline meta_kernel& operator<<(meta_kernel &k,
  927. const invoked_identity<T, Arg> &expr)
  928. {
  929. return k << expr.m_arg;
  930. }
  931. template<>
  932. struct inject_type_impl<double_>
  933. {
  934. void operator()(meta_kernel &kernel)
  935. {
  936. kernel.add_extension_pragma("cl_khr_fp64", "enable");
  937. }
  938. };
  939. template<class Scalar, size_t N>
  940. struct inject_type_impl<vector_type<Scalar, N> >
  941. {
  942. void operator()(meta_kernel &kernel)
  943. {
  944. kernel.inject_type<Scalar>();
  945. }
  946. };
  947. } // end detail namespace
  948. } // end compute namespace
  949. } // end boost namespace
  950. #endif // BOOST_COMPUTE_DETAIL_META_KERNEL_HPP