// Boost.uBLAS // // Copyright (c) 2018 Fady Essam // Copyright (c) 2018 Stefan Seefeld // // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt or // copy at http://www.boost.org/LICENSE_1_0.txt) #ifndef boost_numeric_ublas_opencl_misc_hpp_ #define boost_numeric_ublas_opencl_misc_hpp_ #include #include #include namespace boost { namespace numeric { namespace ublas { namespace opencl { template typename std::enable_if::value, T>::type a_sum(ublas::vector const &v, compute::command_queue& queue) { compute::vector scratch_buffer(v.size(), queue.get_context()); compute::vector result_buffer(1, queue.get_context()); cl_event event; if (std::is_same::value) clblasSasum(v.size(), result_buffer.begin().get_buffer().get(), //result buffer 0, //offset in result buffer v.begin().get_buffer().get(), //input buffer 0, //offset in input buffer 1, //increment in input buffer scratch_buffer.begin().get_buffer().get(), 1, //number of command queues &(queue.get()), //queue 0, // number of events waiting list NULL, //event waiting list &event); //event else if (std::is_same::value) clblasDasum(v.size(), result_buffer.begin().get_buffer().get(), //result buffer 0, //offset in result buffer v.begin().get_buffer().get(), //input buffer 0, //offset in input buffer 1, //increment in input buffer scratch_buffer.begin().get_buffer().get(), 1, //number of command queues &(queue.get()), //queue 0, // number of events waiting list NULL, //event waiting list &event); //event else if (std::is_same>::value) clblasScasum(v.size(), result_buffer.begin().get_buffer().get(), //result buffer 0, //offset in result buffer v.begin().get_buffer().get(), //input buffer 0, //offset in input buffer 1, //increment in input buffer scratch_buffer.begin().get_buffer().get(), 1, //number of command queues &(queue.get()), //queue 0, // number of events waiting list NULL, //event waiting list &event); //event else if (std::is_same>::value) clblasDzasum(v.size(), result_buffer.begin().get_buffer().get(), //result buffer 0, //offset in result buffer v.begin().get_buffer().get(), //input buffer 0, //offset in input buffer 1, //increment in input buffer scratch_buffer.begin().get_buffer().get(), 1, //number of command queues &(queue.get()), //queue 0, // number of events waiting list NULL, //event waiting list &event); //event clWaitForEvents(1, &event); return result_buffer[0]; } template typename std::enable_if::value, T>::type a_sum(ublas::vector const &v, compute::command_queue& queue) { ublas::vector vdev(v, queue); return a_sum(vdev, queue); } template typename std::enable_if::value | std::is_same::value, T>::type norm_1(ublas::vector const &v, compute::command_queue& queue) { return a_sum(v, queue); } template typename std::enable_if::value | std::is_same::value, T>::type norm_1(ublas::vector const &v, compute::command_queue& queue) { ublas::vector vdev(v, queue); return norm_1(vdev, queue); } template typename std::enable_if::value, T>::type norm_2(ublas::vector const &v, compute::command_queue& queue) { compute::vector scratch_buffer(2*v.size(), queue.get_context()); compute::vector result_buffer(1, queue.get_context()); cl_event event; if (std::is_same::value) clblasSnrm2(v.size(), result_buffer.begin().get_buffer().get(), //result buffer 0, //offset in result buffer v.begin().get_buffer().get(), //input buffer 0, //offset in input buffer 1, //increment in input buffer scratch_buffer.begin().get_buffer().get(), 1, //number of command queues &(queue.get()), //queue 0, // number of events waiting list NULL, //event waiting list &event); //event else if (std::is_same::value) clblasDnrm2(v.size(), result_buffer.begin().get_buffer().get(), //result buffer 0, //offset in result buffer v.begin().get_buffer().get(), //input buffer 0, //offset in input buffer 1, //increment in input buffer scratch_buffer.begin().get_buffer().get(), 1, //number of command queues &(queue.get()), //queue 0, // number of events waiting list NULL, //event waiting list &event); //event else if (std::is_same>::value) clblasScnrm2(v.size(), result_buffer.begin().get_buffer().get(), //result buffer 0, //offset in result buffer v.begin().get_buffer().get(), //input buffer 0, //offset in input buffer 1, //increment in input buffer scratch_buffer.begin().get_buffer().get(), 1, //number of command queues &(queue.get()), //queue 0, // number of events waiting list NULL, //event waiting list &event); //event else if (std::is_same>::value) clblasDznrm2(v.size(), result_buffer.begin().get_buffer().get(), //result buffer 0, //offset in result buffer v.begin().get_buffer().get(), //input buffer 0, //offset in input buffer 1, //increment in input buffer scratch_buffer.begin().get_buffer().get(), 1, //number of command queues &(queue.get()), //queue 0, // number of events waiting list NULL, //event waiting list &event); //event clWaitForEvents(1, &event); return result_buffer[0]; } template typename std::enable_if::value, T>::type norm_2(ublas::vector const &v, compute::command_queue& queue) { ublas::vector vdev(v, queue); return norm_2(vdev, queue); } }}}} #endif