diff --git a/include/boost/compute/distributed/command_queue.hpp b/include/boost/compute/distributed/command_queue.hpp new file mode 100644 index 000000000..77acb9191 --- /dev/null +++ b/include/boost/compute/distributed/command_queue.hpp @@ -0,0 +1,256 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2016 Jakub Szuppe +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_DISTRIBUTED_COMMAND_QUEUE_HPP +#define BOOST_COMPUTE_DISTRIBUTED_COMMAND_QUEUE_HPP + +#include + +#include +#include +#include + +namespace boost { +namespace compute { +namespace distributed { + +class command_queue +{ +public: + enum properties { + enable_profiling = CL_QUEUE_PROFILING_ENABLE, + enable_out_of_order_execution = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE + }; + + enum map_flags { + map_read = CL_MAP_READ, + map_write = CL_MAP_WRITE + #ifdef CL_VERSION_1_2 + , + map_write_invalidate_region = CL_MAP_WRITE_INVALIDATE_REGION + #endif + }; + + /// Creates a null distributed command queue. + command_queue() + : m_context(), + m_queues() + { + + } + + /// Creates a distributed command queue for all devices in \p context with + /// \p properties. + explicit + command_queue(const ::boost::compute::distributed::context &context, + cl_command_queue_properties properties = 0) + : m_context(context) + { + size_t n = m_context.size(); + for(size_t i = 0; i < n; i++) { + ::boost::compute::context c = m_context.get(i); + std::vector devices = c.get_devices(); + for(size_t j = 0; j < devices.size(); j++) { + m_queues.push_back( + ::boost::compute::command_queue(c, devices[j], properties) + ); + } + } + } + + /// Creates a distributed command queue containing command queues for each + /// corresponding device and context from \p devices and \p contexts. + command_queue(const std::vector< ::boost::compute::context> &contexts, + const std::vector< std::vector > &devices, + cl_command_queue_properties properties = 0) + { + m_context = context(contexts); + for(size_t i = 0; i < m_context.size(); i++) { + for(size_t j = 0; j < devices[i].size(); j++) { + m_queues.push_back( + ::boost::compute::command_queue( + m_context.get(i), devices[i][j], properties + ) + ); + } + } + } + + /// Creates a distributed command queue for all devices in \p context. + command_queue(const ::boost::compute::context &context, + cl_command_queue_properties properties = 0) + { + m_context = ::boost::compute::distributed::context(context); + std::vector devices = context.get_devices(); + for(size_t i = 0; i < devices.size(); i++) { + m_queues.push_back( + ::boost::compute::command_queue( + context, devices[i], properties + ) + ); + } + } + + /// Creates a distributed command queue containing \p queues. + explicit + command_queue(const std::vector< ::boost::compute::command_queue> queues) + : m_queues(queues) + { + std::vector< ::boost::compute::context> contexts; + for(size_t i = 0; i < m_queues.size(); i++) { + contexts.push_back( + m_queues[i].get_context() + ); + } + m_context = context(contexts); + } + + /// Creates a new command queue object as a copy of \p other. + command_queue(const command_queue &other) + : m_context(other.m_context), + m_queues(other.m_queues) + { + + } + + /// Copies the command queue object from \p other to \c *this. + command_queue& operator=(const command_queue &other) + { + if(this != &other){ + m_context = other.m_context; + m_queues = other.m_queues; + } + return *this; + } + + #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES + /// Move-constructs a new command queue object from \p other. + command_queue(command_queue&& other) BOOST_NOEXCEPT + : m_context(std::move(other.m_context)), + m_queues(std::move(other.m_queues)) + { + + } + + /// Move-assigns the command queue from \p other to \c *this. + command_queue& operator=(command_queue&& other) BOOST_NOEXCEPT + { + m_context = std::move(other.m_context); + m_queues = std::move(other.m_queues); + return *this; + } + #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES + + /// Returns the number of individual command queues in this + /// distributed command queue. + size_t size() const + { + return m_queues.size(); + } + + /// Returns the nth command queue. + ::boost::compute::command_queue& get(size_t n) + { + return m_queues[n]; + } + + /// Returns the nth command queue. + const ::boost::compute::command_queue& get(size_t n) const + { + return m_queues[n]; + } + + /// Returns the distributed context used for creating this distributed + /// command queue. + const context& get_context() const + { + return m_context; + } + + /// Returns the context of the nth command queue from distributed + /// command queue. + ::boost::compute::context get_context(size_t n) const + { + return m_queues[n].get_context(); + } + + /// Returns true if all device command queues are in the same OpenCL + /// context. + bool one_context() const + { + return m_context.one_context(); + } + + /// Returns nth context from command queue's distributed context. + ::boost::compute::device get_device(size_t n) const + { + return m_queues[n].get_device(); + } + + /// Returns \c true if the command queue is the same as \p other. + bool operator==(const command_queue &other) const + { + return (m_context == other.m_context) && (m_queues == other.m_queues); + } + + /// Returns \c true if the command queue is different from \p other. + bool operator!=(const command_queue &other) const + { + return (m_context != other.m_context) || (m_queues != other.m_queues); + } + + /// Returns information about nth command queue. + template + T get_info(size_t n, cl_command_queue_info info) const + { + return m_queues[n].get_info(info); + } + + /// Flushes the command queue. + void flush() + { + for(size_t i = 0; i < m_queues.size(); i++) + { + m_queues[i].flush(); + } + } + + /// Blocks until all outstanding commands in the queue have finished. + void finish() + { + for(size_t i = 0; i < m_queues.size(); i++) + { + m_queues[i].finish(); + } + } + + /// \internal_ + /// Return true if every device supports at least OpenCL major.minor + bool check_devices_version(int major, int minor) const + { + bool check = true; + for(size_t i = 0; i < m_queues.size(); i++) + { + check = check && m_queues[i].get_device().check_version(major, minor); + } + return check; + } + +private: + ::boost::compute::distributed::context m_context; + std::vector< ::boost::compute::command_queue> m_queues; +}; + + +} // end distributed namespace +} // end compute namespace +} // end boost namespace + +#endif /* BOOST_COMPUTE_DISTRIBUTED_COMMAND_QUEUE_HPP */ diff --git a/include/boost/compute/distributed/context.hpp b/include/boost/compute/distributed/context.hpp new file mode 100644 index 000000000..41f5a6e80 --- /dev/null +++ b/include/boost/compute/distributed/context.hpp @@ -0,0 +1,226 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2016 Jakub Szuppe +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_DISTRIBUTED_CONTEXT_HPP +#define BOOST_COMPUTE_DISTRIBUTED_CONTEXT_HPP + +#include + +#include +#include + +namespace boost { +namespace compute { +namespace distributed { + +class context +{ +public: + /// Create a null context object. + context() + : m_contexts() + { + } + + /// Creates a new distributed context for \p devices, containing + /// boost::compute::context objects, each constructed using corresponding + /// vector of OpenCL devices and \p properties. + context(const std::vector< std::vector< ::boost::compute::device> > &devices, + const std::vector &properties) + { + m_contexts = std::vector< ::boost::compute::context>(); + for(size_t i = 0; i < devices.size(); i++) { + m_contexts.push_back( + ::boost::compute::context(devices[i], properties[i]) + ); + } + } + + /// Creates a new distributed context for \p devices, containing + /// boost::compute::context objects, each constructed using corresponding + /// vector of OpenCL devices and default properties. + explicit context(const std::vector< std::vector< ::boost::compute::device> > &devices) + { + m_contexts = std::vector< ::boost::compute::context>(); + for(size_t i = 0; i < devices.size(); i++) { + m_contexts.push_back( + ::boost::compute::context(devices[i]) + ); + } + } + + /// Creates a new distributed context for \p devices with \p properties. + context(const std::vector< ::boost::compute::device> &devices, + const std::vector &properties) + { + m_contexts = std::vector< ::boost::compute::context>(); + for(size_t i = 0; i < devices.size(); i++) { + m_contexts.push_back( + ::boost::compute::context(devices[i], properties[i]) + ); + } + } + + /// Creates a new distributed context for \p devices + explicit context(const std::vector< ::boost::compute::device> &devices) + { + m_contexts = std::vector< ::boost::compute::context>(); + for(size_t i = 0; i < devices.size(); i++) { + m_contexts.push_back( + ::boost::compute::context(devices[i]) + ); + } + } + + /// Creates a new distributed context using \p contexts. + explicit context(const std::vector< ::boost::compute::context>& contexts) + : m_contexts(contexts) + { + + } + + /// Creates a new distributed context using contexts from range + /// [\p first, \p last). + template + explicit context(Iterator first, Iterator last) + : m_contexts(first, last) + { + + } + + /// Creates a new distributed context from one \p context. + explicit context(const ::boost::compute::context& context) + : m_contexts(1, context) + { + + } + + /// Creates a new context object as a copy of \p other. + context(const context &other) + : m_contexts(other.m_contexts) + { + + } + + /// Copies the context object from \p other to \c *this. + context& operator=(const context &other) + { + if(this != &other){ + m_contexts = + std::vector< ::boost::compute::context>(other.m_contexts); + } + return *this; + } + + #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES + /// Move-constructs a new context object from \p other. + context(context&& other) BOOST_NOEXCEPT + : m_contexts(std::move(other.m_contexts)) + { + + } + + /// Move-assigns the context from \p other to \c *this. + context& operator=(context&& other) BOOST_NOEXCEPT + { + m_contexts = std::move(other.m_contexts); + return *this; + } + #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES + + /// Returns number of individual contexts in distributed context. + size_t size() const + { + return m_contexts.size(); + } + + /// Returns nth context. + const ::boost::compute::context& get(size_t n) const + { + return m_contexts[n]; + } + + /// Returns true if all contexts are in fact the same OpenCL context. + bool one_context() const + { + bool one_context = m_contexts[0].get() != 0; + for(size_t i = 1; i < m_contexts.size(); i++) { + one_context = (one_context && (m_contexts[0] == m_contexts[i])); + } + return one_context; + } + + /// Returns information about nth context. + template + T get_info(size_t n, cl_context_info info) const + { + return m_contexts[n].get_info(info); + } + + /// Returns the device for the nth context in distributed context. + std::vector get_devices(size_t n) const + { + return m_contexts[n].get_info >(CL_CONTEXT_DEVICES); + } + + /// Returns a vector of devices for the distributed context. + std::vector > get_devices() const + { + std::vector > devices; + for(size_t i = 0; i < m_contexts.size(); i++) { + devices.push_back( + m_contexts[i].get_info >(CL_CONTEXT_DEVICES) + ); + } + return devices; + } + + /// Returns \c true if the context is the same as \p other. + bool operator==(const context &other) const + { + return m_contexts == other.m_contexts; + } + + /// Returns \c true if the context is different from \p other. + bool operator!=(const context &other) const + { + return m_contexts != other.m_contexts; + } + +private: + std::vector< ::boost::compute::context> m_contexts; +}; + + +inline bool operator==(const ::boost::compute::context &lhs, const context& rhs) +{ + return (rhs.size() == 1) && (rhs.get(0) == lhs); +} + +inline bool operator==(const context& lhs, const ::boost::compute::context &rhs) +{ + return (lhs.size() == 1) && (lhs.get(0) == rhs); +} + +inline bool operator!=(const ::boost::compute::context &lhs, const context& rhs) +{ + return !(lhs == rhs); +} + +inline bool operator!=(const context& lhs, const ::boost::compute::context &rhs) +{ + return !(lhs == rhs); +} + +} // end distributed namespace +} // end compute namespace +} // end boost namespace + +#endif /* BOOST_COMPUTE_DISTRIBUTED_CONTEXT_HPP */ diff --git a/include/boost/compute/distributed/copy.hpp b/include/boost/compute/distributed/copy.hpp new file mode 100644 index 000000000..3b12193cb --- /dev/null +++ b/include/boost/compute/distributed/copy.hpp @@ -0,0 +1,525 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2016 Jakub Szuppe +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_DISTRIBUTED_COPY_HPP +#define BOOST_COMPUTE_DISTRIBUTED_COPY_HPP + +#include +#include + +#include + +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +namespace boost { +namespace compute { +namespace distributed { + +// forward declaration for distributed::vector +template< + class T, + weight_func weight, + class Alloc +> +class vector; + +// host -> distributed::vector +/// Copies the values in the range [\p first, \p last) allocated on host to +/// distributed::vector \p result. The copy is performed asynchronously. +template < + class InputIterator, + class T, weight_func weight, class Alloc +> +inline std::vector +copy_async(InputIterator first, + InputIterator last, + vector &result, + command_queue &queue, + typename boost::enable_if_c< + !is_device_iterator::value + >::type* = 0) +{ + typedef typename + std::iterator_traits::difference_type diff_type; + + std::vector events; + events.reserve(result.parts()); + + InputIterator part_first = first; + InputIterator part_end = first; + for(size_t i = 0; i < result.parts(); i++) + { + part_end = (std::min)( + part_end + static_cast(result.part_size(i)), + last + ); + event e = + ::boost::compute::copy_async( + part_first, + part_end, + result.begin(i), + queue.get(i) + ).get_event(); + if(e.get()) { + events.push_back(e); + } + part_first = part_end; + } + return events; +} + +// host -> distributed::vector +/// Copies the values in the range [\p first, \p last) allocated on host to +/// distributed::vector \p result. +template < + class InputIterator, + class T, weight_func weight, class Alloc +> +inline void +copy(InputIterator first, + InputIterator last, + vector &result, + command_queue &queue, + typename boost::enable_if_c< + !is_device_iterator::value + >::type* = 0) +{ + std::vector events = + ::boost::compute::distributed::copy_async(first, last, result, queue); + for(size_t i = 0; i < events.size(); i++) { + events[i].wait(); + } +} + +// distributed::vector -> host +/// Copy all values from distributed::vector \p input to the range beginning at +/// \p result allocated on the host. +template < + class T, weight_func weight, class Alloc, + class OutputIterator +> +inline std::vector +copy_async(const vector &input, + OutputIterator result, + command_queue &queue, + typename boost::enable_if< + mpl::and_< + mpl::not_< + is_device_iterator + >, + mpl::not_< + detail::is_distributed_vector + > + > + >::type* = 0) +{ + typedef typename + std::iterator_traits::difference_type diff_type; + + std::vector events; + events.reserve(input.parts()); + + OutputIterator part_result = result; + for(size_t i = 0; i < input.parts(); i++) + { + event e = + ::boost::compute::copy_async( + input.begin(i), + input.end(i), + part_result, + queue.get(i) + ).get_event(); + if(e.get()) { + events.push_back(e); + } + part_result += static_cast(input.part_size(i)); + } + return events; +} + +// distributed::vector -> host +/// Copy all values from distributed::vector \p input to the range beginning at +/// \p result allocated on the host. +template < + class T, weight_func weight, class Alloc, + class OutputIterator +> +inline void +copy(const vector &input, + OutputIterator result, + command_queue &queue, + typename boost::enable_if< + mpl::and_< + mpl::not_< + is_device_iterator + >, + mpl::not_< + detail::is_distributed_vector + > + > + >::type* = 0) +{ + std::vector events = + ::boost::compute::distributed::copy_async(input, result, queue); + for(size_t i = 0; i < events.size(); i++) { + events[i].wait(); + } +} + +// device -> distributed::vector +/// Copies the values in the range [\p first, \p last) allocated on an OpenCL +/// device to the distributed::vector \p result. The copy is performed +/// asynchronously. +template < + class InputIterator, + class T, weight_func weight, class Alloc +> +inline std::vector +copy_async(InputIterator first, + InputIterator last, + vector &result, + ::boost::compute::command_queue &device_queue, + command_queue &distributed_queue, + typename boost::enable_if_c< + is_device_iterator::value + >::type* = 0) +{ + typedef typename + std::iterator_traits::difference_type diff_type; + + std::vector events; + events.reserve(result.parts()); + + InputIterator part_first = first; + InputIterator part_end = first; + for(size_t i = 0; i < result.parts(); i++) + { + BOOST_ASSERT_MSG( + distributed_queue.get(i).get_context() == device_queue.get_context(), + "copy_async() is only supported when context of every queue in" + " distributed_queue is the same context as context of device_queue" + ); + part_end = (std::min)( + part_end + static_cast(result.part_size(i)), + last + ); + event e = + ::boost::compute::copy_async( + part_first, + part_end, + result.begin(i), + distributed_queue.get(i) + ).get_event(); + if(e.get()) { + events.push_back(e); + } + part_first = part_end; + } + return events; +} + +// host -> distributed::vector +/// Copies the values in the range [\p first, \p last) allocated on an OpenCL +/// device to the distributed::vector \p result. +template < + class InputIterator, + class T, weight_func weight, class Alloc +> +inline void +copy(InputIterator first, + InputIterator last, + vector &result, + ::boost::compute::command_queue &device_queue, + command_queue &distributed_queue, + typename boost::enable_if_c< + is_device_iterator::value + >::type* = 0) +{ + typedef typename + std::iterator_traits::difference_type diff_type; + + std::vector events; + events.reserve(result.parts()); + + InputIterator part_first = first; + InputIterator part_end = first; + for(size_t i = 0; i < result.parts(); i++) + { + part_end = (std::min)( + part_end + static_cast(result.part_size(i)), + last + ); + if(distributed_queue.get(i).get_context() == device_queue.get_context()) + { + event e = + ::boost::compute::copy_async( + part_first, + part_end, + result.begin(i), + distributed_queue.get(i) + ).get_event(); + if(e.get()) { + events.push_back(e); + } + } + else { + std::vector host(result.part_size(i)); + ::boost::compute::copy( + part_first, + part_end, + host.begin(), + device_queue + ); + ::boost::compute::copy( + host.begin(), + host.end(), + result.begin(i), + distributed_queue.get(i) + ); + } + part_first = part_end; + } + for(size_t i = 0; i < events.size(); i++) { + events[i].wait(); + } +} + +// distributed::vector -> device +/// Copy all values from distributed::vector \p input to the range beginning at +/// \p result allocated on an OpenCL device. +template < + class T, weight_func weight, class Alloc, + class OutputIterator +> +inline std::vector +copy_async(const vector &input, + OutputIterator result, + command_queue &distributed_queue, + ::boost::compute::command_queue &device_queue, + typename boost::enable_if< + mpl::and_< + is_device_iterator, + mpl::not_< + detail::is_distributed_vector + > + > + >::type* = 0) +{ + typedef typename + std::iterator_traits::difference_type diff_type; + + std::vector events; + events.reserve(input.parts()); + + OutputIterator part_result = result; + for(size_t i = 0; i < input.parts(); i++) + { + BOOST_ASSERT_MSG( + distributed_queue.get(i).get_context() == device_queue.get_context(), + "copy_async() is only supported when context of every queue in" + " distributed_queue is the same context as context of device_queue" + ); + event e = + ::boost::compute::copy_async( + input.begin(i), + input.end(i), + part_result, + distributed_queue.get(i) + ).get_event(); + if(e.get()) { + events.push_back(e); + } + part_result += static_cast(input.part_size(i)); + } + return events; +} + +// distributed::vector -> device +/// Copy all values from distributed::vector \p input to the range beginning at +/// \p result allocated on an OpenCL device. +template < + class T, weight_func weight, class Alloc, + class OutputIterator +> +inline void +copy(const vector &input, + OutputIterator result, + command_queue &distributed_queue, + ::boost::compute::command_queue &device_queue, + typename boost::enable_if< + mpl::and_< + is_device_iterator, + mpl::not_< + detail::is_distributed_vector + > + > + >::type* = 0) +{ + typedef typename + std::iterator_traits::difference_type diff_type; + + std::vector events; + events.reserve(input.parts()); + + OutputIterator part_result = result; + for(size_t i = 0; i < input.parts(); i++) + { + if(distributed_queue.get(i).get_context() == device_queue.get_context()) + { + event e = + ::boost::compute::copy_async( + input.begin(i), + input.end(i), + part_result, + distributed_queue.get(i) + ).get_event(); + if(e.get()) { + events.push_back(e); + } + } + else { + std::vector host(input.part_size(i)); + ::boost::compute::copy( + input.begin(i), + input.end(i), + host.begin(), + distributed_queue.get(i) + ); + ::boost::compute::copy( + host.begin(), + host.end(), + part_result, + device_queue + ); + } + part_result += static_cast(input.part_size(i)); + } + for(size_t i = 0; i < events.size(); i++) { + events[i].wait(); + } +} + +// distributed::vector -> distributed::vector +/// Copy distributed::vector \p input into \p output. The copy is performed +/// asynchronously. +/// +/// Both vectors must be able to use \p queue, have the same weight function, +/// the same size and the same number of parts. +template< + class T1, weight_func weight, class Alloc1, + class T2, class Alloc2 +> +inline std::vector +copy_async(const vector &input, + vector &output, + command_queue &queue) +{ + BOOST_ASSERT(input.parts() == output.parts()); + BOOST_ASSERT(input.size() == output.size()); + + std::vector events; + events.reserve(input.parts()); + + for(size_t i = 0; i < input.parts(); i++) + { + event e = + ::boost::compute::copy_async( + input.begin(i), + input.end(i), + output.begin(i), + queue.get(i) + ).get_event(); + if(e.get()) { + events.push_back(e); + } + } + return events; +} + +// distributed::vector -> distributed::vector +/// Copy distributed::vector \p input into \p output. +template< + class T1, weight_func weight1, class Alloc1, + class T2, weight_func weight2, class Alloc2 +> +inline void +copy(const vector &input, + vector &output, + command_queue &input_queue, + command_queue &output_queue) +{ + std::vector events; + events.reserve(input.parts()); + + std::vector host(input.size()); + events = + ::boost::compute::distributed::copy_async( + input, host.begin(), input_queue + ); + // wait for copying from input to host + for(size_t i = 0; i < events.size(); i++) { + events[i].wait(); + } + // copy from host to output vector + events = + ::boost::compute::distributed::copy_async( + host.begin(), host.end(), output, output_queue + ); + for(size_t i = 0; i < events.size(); i++) { + events[i].wait(); + } +} + +// distributed::vector -> distributed::vector +/// Copy distributed::vector \p input into \p output. +/// +/// Both vectors must be able to use \p queue, have the same weight function, +/// and have the same number of parts. +template< + class T1, weight_func weight, class Alloc1, + class T2, class Alloc2 +> +inline void +copy(const vector &input, + vector &output, + command_queue &queue) +{ + BOOST_ASSERT(input.parts() == output.parts()); + if(input.size() == output.size()) { + std::vector events = + ::boost::compute::distributed::copy_async( + input, output, queue + ); + for(size_t i = 0; i < events.size(); i++) { + events[i].wait(); + } + } + else { + ::boost::compute::distributed::copy( + input, output, queue, queue + ); + } +} + +} // end distributed namespace +} // end compute namespace +} // end boost namespace + +#endif /* BOOST_COMPUTE_DISTRIBUTED_COPY_HPP */ diff --git a/include/boost/compute/distributed/detail/is_distributed_vector.hpp b/include/boost/compute/distributed/detail/is_distributed_vector.hpp new file mode 100644 index 000000000..57fe5a66d --- /dev/null +++ b/include/boost/compute/distributed/detail/is_distributed_vector.hpp @@ -0,0 +1,35 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2016 Jakub Szuppe +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_DISTRIBUTED_DETAIL_IS_DISTRIBUTED_VECTOR_HPP +#define BOOST_COMPUTE_DISTRIBUTED_DETAIL_IS_DISTRIBUTED_VECTOR_HPP + + +#include + +namespace boost { +namespace compute { +namespace distributed { + +namespace detail { + +template +struct is_distributed_vector : boost::false_type {}; + +template +struct is_distributed_vector : is_distributed_vector {}; + +} // end detail namespace + +} // end distributed namespace +} // end compute namespace +} // end boost namespace + +#endif /* BOOST_COMPUTE_DISTRIBUTED_DETAIL_IS_DISTRIBUTED_VECTOR_HPP */ diff --git a/include/boost/compute/distributed/detail/weight_func.hpp b/include/boost/compute/distributed/detail/weight_func.hpp new file mode 100644 index 000000000..0910137c6 --- /dev/null +++ b/include/boost/compute/distributed/detail/weight_func.hpp @@ -0,0 +1,77 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2016 Jakub Szuppe +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_DETAIL_WEIGHT_FUNC_HPP +#define BOOST_COMPUTE_DETAIL_WEIGHT_FUNC_HPP + +#include + +#include +#include + +namespace boost { +namespace compute { +namespace distributed { + +typedef std::vector (*weight_func)(const command_queue&); + +std::vector default_weight_func(const command_queue& queue) +{ + return std::vector(queue.size(), 1.0/queue.size()); +} + +namespace detail { + +/// \internal_ +/// Rounds up \p n to the nearest multiple of \p m. +/// Note: \p m must be a multiple of 2. +size_t round_up(size_t n, size_t m) +{ + assert(m && ((m & (m -1)) == 0)); + return (n + m - 1) & ~(m - 1); +} + +std::vector partition(const command_queue& queue, + weight_func weight_func, + const size_t size, + const size_t align) +{ + std::vector weights = weight_func(queue); + std::vector partition; + partition.reserve(queue.size() + 1); + partition.push_back(0); + + if(queue.size() > 1) + { + double acc = 0; + for(size_t i = 0; i < queue.size(); i++) + { + acc += weights[i]; + partition.push_back( + std::min( + size, + round_up(size * acc, align) + ) + ); + } + return partition; + } + partition.push_back(size); + return partition; +} + +} // end distributed detail + +} // end distributed namespace +} // end compute namespace +} // end boost namespace + + +#endif /* INCLUDE_BOOST_COMPUTE_DETAIL_WEIGHT_FUNC_HPP_ */ diff --git a/include/boost/compute/distributed/exclusive_scan.hpp b/include/boost/compute/distributed/exclusive_scan.hpp new file mode 100644 index 000000000..884de3f38 --- /dev/null +++ b/include/boost/compute/distributed/exclusive_scan.hpp @@ -0,0 +1,183 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_DISTRIBUTED_EXCLUSIVE_SCAN_HPP +#define BOOST_COMPUTE_DISTRIBUTED_EXCLUSIVE_SCAN_HPP + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace boost { +namespace compute { +namespace distributed { + +template< + class InputType, weight_func weight, class Alloc, + class OutputType, + class BinaryOperator +> +inline void +exclusive_scan(const vector &input, + vector &result, + OutputType init, + BinaryOperator binary_op, + command_queue &queue) +{ + BOOST_ASSERT(input.parts() == result.parts()); + BOOST_ASSERT(input.size() == result.size()); + + std::vector input_tails; + input_tails.reserve(input.parts() - 1); + for(size_t i = 0; i < input.parts(); i++) + { + if(input.begin(i) != input.end(i) && i < (input.parts() - 1)) + { + input_tails.push_back( + static_cast( + (input.end(i) - 1).read(queue.get(i)) + ) + ); + } + + if(i == 0) + { + ::boost::compute::exclusive_scan( + input.begin(i), + input.end(i), + result.begin(i), + init, + binary_op, + queue.get(i) + ); + } + else + { + ::boost::compute::exclusive_scan( + input.begin(i), + input.end(i), + result.begin(i), + input_tails[i - 1], + binary_op, + queue.get(i) + ); + } + } + + // find device for calculating partial sum of last elements of input vector + ::boost::compute::command_queue& device_queue = queue.get(0); + // CPU device is preferred, however if there is none, the first device + // queue is used + for(size_t i = 0; i < queue.size(); i++) + { + if(queue.get(i).get_device().type() & ::boost::compute::device::cpu) + { + device_queue = queue.get(i); + break; + } + } + + std::vector output_tails(input_tails.size()); + for(size_t i = 0; i < input.parts() - 1; i++) + { + if(input.begin(i) != input.end(i)) + { + output_tails[i] = (result.end(i) - 1).read(queue.get(i)); + } + } + ::boost::compute::vector output_tails_device( + output_tails.size(), device_queue.get_context() + ); + ::boost::compute::copy_async( + output_tails.begin(), + output_tails.end(), + output_tails_device.begin(), + device_queue + ); + ::boost::compute::inclusive_scan( + output_tails_device.begin(), + output_tails_device.end(), + output_tails_device.begin(), + device_queue + ); + ::boost::compute::copy( + output_tails_device.begin(), + output_tails_device.end(), + output_tails.begin(), + device_queue + ); + for(size_t i = 1; i < input.parts(); i++) + { + ::boost::compute::transform( + result.begin(i), + result.end(i), + ::boost::compute::make_constant_iterator( + output_tails[i - 1] + ), + result.begin(i), + binary_op, + queue.get(i) + ); + } +} + +/// \overload +template< + class InputType, weight_func weight, class Alloc, + class OutputType +> +inline void +exclusive_scan(const vector &input, + vector &result, + OutputType init, + command_queue &queue) +{ + ::boost::compute::distributed::exclusive_scan( + input, + result, + init, + boost::compute::plus(), + queue + ); +} + +/// \overload +template< + class InputType, weight_func weight, class Alloc, + class OutputType +> +inline void +exclusive_scan(const vector &input, + vector &result, + command_queue &queue) +{ + ::boost::compute::distributed::exclusive_scan( + input, + result, + OutputType(0), + boost::compute::plus(), + queue + ); +} + +} // end distributed namespace +} // end compute namespace +} // end boost namespace + +#endif /* BOOST_COMPUTE_DISTRIBUTED_SCAN_HPP */ diff --git a/include/boost/compute/distributed/reduce.hpp b/include/boost/compute/distributed/reduce.hpp new file mode 100644 index 000000000..cac7d0de1 --- /dev/null +++ b/include/boost/compute/distributed/reduce.hpp @@ -0,0 +1,395 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2016 Jakub Szuppe +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_DISTRIBUTED_REDUCE_HPP +#define BOOST_COMPUTE_DISTRIBUTED_REDUCE_HPP + +#include + +#include + +#include +#include +#include +#include +#include + +#include +#include + +namespace boost { +namespace compute { +namespace distributed { + +namespace detail { + +template +inline ::boost::compute::command_queue& +final_reduce_queue(OutputIterator result, + command_queue &queue, + typename boost::enable_if_c< + !is_device_iterator::value + >::type* = 0) +{ + (void) result; + + ::boost::compute::command_queue& device_queue = queue.get(0); + // CPU device is preferred, however if there is none, the first device + // queue is used + for(size_t i = 0; i < queue.size(); i++) + { + if(queue.get(i).get_device().type() & ::boost::compute::device::cpu) + { + device_queue = queue.get(i); + break; + } + } + return device_queue; +} + +template +inline ::boost::compute::command_queue& +final_reduce_queue(OutputIterator result, + command_queue &queue, + typename boost::enable_if_c< + is_device_iterator::value + >::type* = 0) +{ + // first, find all queues that can be used with result iterator + const ::boost::compute::context& result_context = + result.get_buffer().get_context(); + std::vector compatible_queues; + for(size_t i = 0; i < queue.size(); i++) + { + if(queue.get(i).get_context() == result_context) + { + compatible_queues.push_back(i); + } + } + BOOST_ASSERT_MSG( + !compatible_queues.empty(), + "There is no device command queue that can be use to copy to result" + ); + + // then choose device queue from compatible device queues + + // CPU device is preferred, however if there is none, the first + // compatible device queue is used + ::boost::compute::command_queue& device_queue = queue.get(compatible_queues[0]); + for(size_t i = 0; i < compatible_queues.size(); i++) + { + size_t n = compatible_queues[i]; + if(queue.get(n).get_device().type() & ::boost::compute::device::cpu) + { + device_queue = queue.get(n); + break; + } + } + return device_queue; +} + +template< + class InputType, weight_func weight, class Alloc, + class OutputIterator, + class BinaryFunction +> +inline void +dispatch_reduce(const vector &input, + OutputIterator result, + BinaryFunction function, + command_queue &queue) +{ + typedef typename + boost::compute::result_of::type + result_type; + + // find device queue for the final reduction + ::boost::compute::command_queue& device_queue = + final_reduce_queue(result, queue); + + ::boost::compute::buffer parts_results_device( + device_queue.get_context(), input.parts() * sizeof(result_type) + ); + + // if all devices queues are in the same OpenCL context we can + // save part reduction directly into parts_results_device buffer + size_t reduced = 0; + if(queue.one_context()) + { + // reduce each part of input vector + for(size_t i = 0; i < input.parts(); i++) + { + if(input.begin(i) != input.end(i)) + { + // async, because it stores result on device + ::boost::compute::reduce( + input.begin(i), + input.end(i), + ::boost::compute::make_buffer_iterator( + parts_results_device, reduced + ), + function, + queue.get(i) + ); + reduced++; + } + } + // add marker on every queue that is not device_queue, because + // we need to know when reductions are done + wait_list reduce_markers; + reduce_markers.reserve(reduced); + for(size_t i = 0; i < input.parts(); i++) + { + if(input.begin(i) != input.end(i) && queue.get(i) != device_queue) + { + reduce_markers.insert(queue.get(i).enqueue_marker()); + } + } + // if it is possible we enqueue a barrier in device_queue which waits + // for reduce_markers (we can do this since all events are in the same + // context); otherwise, we need to sync. wait for those events + #ifdef CL_VERSION_1_2 + if(device_queue.check_device_version(1, 2)) { + device_queue.enqueue_barrier(reduce_markers); + } + #endif + { + reduce_markers.wait(); + } + } + else + { + // reduce each part of input vector + std::vector parts_results_host(input.parts()); + for(size_t i = 0; i < input.parts(); i++) + { + if(input.begin(i) != input.end(i)) + { + // sync, because it stores result on host + ::boost::compute::reduce( + input.begin(i), + input.end(i), + &parts_results_host[reduced], + function, + queue.get(i) + ); + reduced++; + } + } + // sync, because it copies from host to device + ::boost::compute::copy_n( + parts_results_host.begin(), + reduced, + ::boost::compute::make_buffer_iterator( + parts_results_device + ), + device_queue + ); + } + // final reduction + // async if result is device_iterator, sync otherwise + ::boost::compute::reduce( + ::boost::compute::make_buffer_iterator( + parts_results_device + ), + ::boost::compute::make_buffer_iterator( + parts_results_device, reduced + ), + result, + function, + device_queue + ); +} + +// special case for when OutputIterator is a host iterator +// and binary operator is plus +template< + class InputType, weight_func weight, class Alloc, + class OutputIterator, + class T +> +inline void +dispatch_reduce(const vector &input, + OutputIterator result, + ::boost::compute::plus function, + command_queue &queue, + typename boost::enable_if_c< + !is_device_iterator::value + >::type* = 0) +{ + // reduce each part of input vector + std::vector parts_results_host(input.parts()); + for(size_t i = 0; i < input.parts(); i++) + { + ::boost::compute::reduce( + input.begin(i), + input.end(i), + &parts_results_host[i], + function, + queue.get(i) + ); + } + + // final reduction + *result = parts_results_host[0]; + for(size_t i = 1; i < input.parts(); i++) + { + *result += static_cast(parts_results_host[i]); + } +} + +// special case for when OutputIterator is a host iterator +// and binary operator is min +template< + class InputType, weight_func weight, class Alloc, + class OutputIterator, + class T +> +inline void +dispatch_reduce(vector &input, + OutputIterator result, + ::boost::compute::min function, + command_queue &queue, + typename boost::enable_if_c< + !is_device_iterator::value + >::type* = 0) +{ + // reduce each part of input vector + std::vector parts_results_host(input.parts()); + for(size_t i = 0; i < input.parts(); i++) + { + ::boost::compute::reduce( + input.begin(i), + input.end(i), + &parts_results_host[i], + function, + queue.get(i) + ); + } + + // final reduction + *result = parts_results_host[0]; + for(size_t i = 1; i < input.parts(); i++) + { + *result = (std::min)(static_cast(*result), parts_results_host[i]); + } +} + +// special case for when OutputIterator is a host iterator +// and binary operator is max +template< + class InputType, weight_func weight, class Alloc, + class OutputIterator, + class T +> +inline void +dispatch_reduce(const vector &input, + OutputIterator result, + ::boost::compute::max function, + command_queue &queue, + typename boost::enable_if_c< + !is_device_iterator::value + >::type* = 0) +{ + // reduce each part of input vector + std::vector parts_results_host(input.parts()); + for(size_t i = 0; i < input.parts(); i++) + { + ::boost::compute::reduce( + input.begin(i), + input.end(i), + &parts_results_host[i], + function, + queue.get(i) + ); + } + + // final reduction + *result = parts_results_host[0]; + for(size_t i = 1; i < input.parts(); i++) + { + *result = (std::max)(static_cast(*result), parts_results_host[i]); + } +} + +} // end detail namespace + +/// Returns the result of applying \p function to the elements in the +/// \p input vector. +/// +/// If no function is specified, \c plus will be used. +/// +/// \param input input vector +/// \param result iterator pointing to the output +/// \param function binary reduction function +/// \param queue distributed command queue to perform the operation +/// +/// Distributed command queue \p queue has to span same set of compute devices +/// (including their contexts) as distributed command queue used to create +/// \p input vector. +/// +/// If \p result is a device iterator, its underlying buffer must be allocated +/// in context of at least one device command queue from \p queue. +/// +/// The \c reduce() algorithm assumes that the binary reduction function is +/// associative. When used with non-associative functions the result may +/// be non-deterministic and vary in precision. Notably this affects the +/// \c plus() function as floating-point addition is not associative +/// and may produce slightly different results than a serial algorithm. +/// +/// This algorithm supports both host and device iterators for the +/// result argument. This allows for values to be reduced and copied +/// to the host all with a single function call. +template< + class InputType, weight_func weight, class Alloc, + class OutputIterator, + class BinaryFunction +> +inline void +reduce(const vector &input, + OutputIterator result, + BinaryFunction function, + command_queue &queue) +{ + if(input.empty()) { + return; + } + + if(input.parts() == 1) { + ::boost::compute::reduce( + input.begin(0), + input.end(0), + result, + function, + queue.get(0) + ); + return; + } + detail::dispatch_reduce(input, result, function, queue); +} + +/// \overload +template< + class InputType, weight_func weight, class Alloc, + class OutputIterator +> +inline void +reduce(const vector &input, + OutputIterator result, + command_queue &queue) +{ + return reduce(input, result, ::boost::compute::plus(), queue); +} + +} // end distributed namespace +} // end compute namespace +} // end boost namespace + +#endif /* BOOST_COMPUTE_DISTRIBUTED_REDUCE_HPP */ diff --git a/include/boost/compute/distributed/transform.hpp b/include/boost/compute/distributed/transform.hpp new file mode 100644 index 000000000..55782ff49 --- /dev/null +++ b/include/boost/compute/distributed/transform.hpp @@ -0,0 +1,170 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2016 Jakub Szuppe +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_DISTRIBUTED_TRANSFORM_HPP +#define BOOST_COMPUTE_DISTRIBUTED_TRANSFORM_HPP + +#include +#include +#include + +#include +#include + +namespace boost { +namespace compute { +namespace distributed { + +/// Transforms all the elements from vector \p input using operator \p op +/// and stores the results in \p result. The transform is performed +/// asynchronously and it returns a vector of events, each assisted with +/// transformation of successive parts of \p input. +/// +/// Distributed command queue \p queue has to span same set of compute devices +/// (including their contexts) as distributed command queue used to create +/// \p input and \p output vectors. +/// +/// \see distributed::transform() +template< + class InputType, class OutputType, + weight_func weight, class Alloc, + class UnaryOperator +> +inline std::vector +transform_async(const vector &input, + vector &result, + UnaryOperator op, + command_queue &queue) +{ + BOOST_ASSERT(input.parts() == result.parts()); + BOOST_ASSERT(input.size() == result.size()); + + std::vector events; + events.reserve(input.parts()); + + for(size_t i = 0; i < input.parts(); i++) + { + event e = + ::boost::compute::copy_async( + ::boost::compute::make_transform_iterator(input.begin(i), op), + ::boost::compute::make_transform_iterator(input.end(i), op), + result.begin(i), + queue.get(i) + ).get_event(); + if(e.get()) { + events.push_back(e); + } + } + return events; +} + +/// \overload +template< + class InputType1, class InputType2, class OutputType, + weight_func weight, class Alloc, + class BinaryOperator +> +inline std::vector +transform_async(const vector &input1, + const vector &input2, + vector &result, + BinaryOperator op, + command_queue &queue) +{ + BOOST_ASSERT(input1.parts() == input2.parts()); + BOOST_ASSERT(input1.parts() == result.parts()); + BOOST_ASSERT(input1.size() == input1.size()); + BOOST_ASSERT(input1.size() == result.size()); + + std::vector events; + events.reserve(input1.parts()); + + ::boost::compute::detail::unpacked unpacked_op = + ::boost::compute::detail::unpack(op); + for(size_t i = 0; i < input1.parts(); i++) + { + event e = + ::boost::compute::copy_async( + ::boost::compute::make_transform_iterator( + ::boost::compute::make_zip_iterator( + boost::make_tuple(input1.begin(i), input2.begin(i)) + ), + unpacked_op + ), + ::boost::compute::make_transform_iterator( + ::boost::compute::make_zip_iterator( + boost::make_tuple(input1.end(i), input2.end(i)) + ), + unpacked_op + ), + result.begin(i), + queue.get(i) + ).get_event(); + if(e.get()) { + events.push_back(e); + } + } + return events; +} + +/// Transforms all the elements from vector \p input using operator \p op +/// and stores the results in \p result. +/// +/// Distributed command queue \p queue has to span same set of compute devices +/// (including their contexts) as distributed command queue used to create +/// \p input and \p output vectors. +/// +/// \see distributed::transform_async() +template< + class InputType, weight_func weight, class Alloc, class OutputType, + class UnaryOperator +> +inline void +transform(const vector &input, + vector &output, + UnaryOperator op, + command_queue &queue) +{ + std::vector events = + transform_async( + input, output, op, queue + ); + for(size_t i = 0; i < events.size(); i++) { + events[i].wait(); + } +} + +/// \overload +template< + class InputType1, class InputType2, class OutputType, + weight_func weight, class Alloc, + class BinaryOperator +> +inline void +transform(const vector &input1, + const vector &input2, + vector &output, + BinaryOperator op, + command_queue &queue) +{ + std::vector events = + transform_async( + input1, input2, output, op, queue + ); + for(size_t i = 0; i < events.size(); i++) { + events[i].wait(); + } +} + +} // end distributed namespace +} // end compute namespace +} // end boost namespace + +#endif /* BOOST_COMPUTE_DISTRIBUTED_TRANSFORM_HPP */ diff --git a/include/boost/compute/distributed/vector.hpp b/include/boost/compute/distributed/vector.hpp new file mode 100644 index 000000000..3ad4c19ca --- /dev/null +++ b/include/boost/compute/distributed/vector.hpp @@ -0,0 +1,597 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2016 Jakub Szuppe +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_DISTRIBUTED_VECTOR_HPP +#define BOOST_COMPUTE_DISTRIBUTED_VECTOR_HPP + +#include +#include +#include +#include +#include + +#include + +#include + +#ifndef BOOST_COMPUTE_NO_HDR_INITIALIZER_LIST +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace boost { +namespace compute { +namespace distributed { + +/// \class vector +/// \brief A resizable array of values allocated across multiple devices. +/// +/// The vector class stores a dynamic array of values. Internally, the data +/// is stored in OpenCL buffer objects from multiple OpenCL contexts. +template< + class T, + weight_func weight = default_weight_func, + class Alloc = ::boost::compute::buffer_allocator +> +class vector +{ +public: + typedef T value_type; + typedef Alloc allocator_type; + typedef typename allocator_type::size_type size_type; + typedef typename allocator_type::difference_type difference_type; + typedef ::boost::compute::detail::buffer_value reference; + typedef const ::boost::compute::detail::buffer_value const_reference; + typedef typename allocator_type::pointer pointer; + typedef typename allocator_type::const_pointer const_pointer; + typedef buffer_iterator iterator; + typedef buffer_iterator const_iterator; + typedef std::reverse_iterator reverse_iterator; + typedef std::reverse_iterator const_reverse_iterator; + + /// Creates an empty distributed vector using \p queue. + explicit vector(const command_queue &queue) + : m_queue(queue), + m_size(0) + { + for(size_t i = 0; i < m_queue.size(); i++) + { + m_allocators.push_back(Alloc(m_queue.get_context(i))); + m_data.push_back( + m_allocators.back() + .allocate(_minimum_capacity()) + ); + m_data_indices.push_back(0); + } + m_data_indices.push_back(0); + } + + /// Creates a distributed vector with space for \p count elements + /// in \p context. + explicit vector(size_t count, const command_queue &queue) + : m_queue(queue), + m_size(count) + { + allocate_memory(count); + } + + /// Creates a distributed vector with space for \p count elements and + /// sets each equal to \p value. + /// + /// For example: + /// \code + /// // creates a vector with four values set to nine (e.g. [9, 9, 9, 9]). + /// boost::compute::distributed::vector vec(4, 9, queue); + /// \endcode + vector(size_type count, + const T &value, + command_queue &queue) + : m_queue(queue), + m_size(count) + { + allocate_memory(m_size); + std::vector events; + events.reserve(m_data.size()); + for(size_t i = 0; i < m_data.size(); i++) + { + event e = + ::boost::compute::fill_async( + begin(i), + end(i), + value, + queue.get(i) + ).get_event(); + if(e.get()) { + events.push_back(e); + } + } + for(size_t i = 0; i < events.size(); i++) { + events[i].wait(); + } + } + + /// Creates a vector with space for the values in the range [\p first, + /// \p last) allocated on the host and copies them into the vector + /// with \p queue. + /// + /// For example: + /// \code + /// // values on the host + /// int data[] = { 1, 2, 3, 4 }; + /// + /// // create a vector of size four and copy the values from data + /// boost::compute::distributed::vector vec(data, data + 4, queue); + /// \endcode + template + vector(HostIterator first, + HostIterator last, + command_queue &queue, + typename boost::enable_if_c< + !is_device_iterator::value + >::type* = 0) + : m_queue(queue), + m_size(::boost::compute::detail::iterator_range_size(first, last)) + { + allocate_memory(m_size); + ::boost::compute::distributed::copy(first, last, *this, m_queue); + } + + /// Creates a vector with space for the values in the range [\p first, + /// \p last) allocated on an OpenCL device and copies them into the vector + /// with \p queue. + template + vector(DeviceIterator first, + DeviceIterator last, + ::boost::compute::command_queue &device_queue, + command_queue &distributed_queue, + typename boost::enable_if_c< + is_device_iterator::value + >::type* = 0) + : m_queue(distributed_queue), + m_size(::boost::compute::detail::iterator_range_size(first, last)) + { + allocate_memory(m_size); + ::boost::compute::distributed::copy( + first, last, *this, device_queue, m_queue + ); + } + + /// Creates a new vector and copies the values from \p other. + explicit vector(const vector &other) + : m_queue(other.m_queue), + m_size(other.m_size) + { + allocate_memory(m_size); + ::boost::compute::distributed::copy( + other, *this, m_queue + ); + } + + /// Creates a new vector and copies the values from \p other + /// with \p queue. + vector(const vector &other, command_queue &queue) + : m_queue(queue), + m_size(other.m_size) + { + allocate_memory(m_size); + if(m_queue == other.m_queue) { + ::boost::compute::distributed::copy( + other, *this, m_queue + ); + } + else { + command_queue other_queue = other.get_queue(); + ::boost::compute::distributed::copy( + other, *this, other_queue, m_queue + ); + } + } + + /// Creates a new vector and copies the values from \p other + /// with \p queue. + template + vector(const vector &other) + : m_queue(other.m_queue), + m_size(other.m_size) + { + allocate_memory(m_size); + ::boost::compute::distributed::copy( + other, *this, m_queue + ); + } + + /// Creates a new vector and copies the values from \p other. + template + vector(const vector &other, + command_queue &queue) + : m_queue(queue), + m_size(other.size()) + { + allocate_memory(m_size); + if(m_queue == other.get_queue()) { + ::boost::compute::distributed::copy( + other, *this, m_queue + ); + } + else { + command_queue other_queue = other.get_queue(); + ::boost::compute::distributed::copy( + other, *this, other_queue, m_queue + ); + } + } + + /// Creates a new vector and copies the values from \p vector. + template + vector(const std::vector &vector, + command_queue &queue) + : m_queue(queue), + m_size(vector.size()) + { + allocate_memory(m_size); + ::boost::compute::distributed::copy( + vector.begin(), vector.end(), *this, m_queue + ); + } + + /// Copy assignment. This operation is always non-blocking. + vector& operator=(const vector &other) + { + if(this != &other){ + m_queue = other.m_queue; + m_size = other.m_size; + allocate_memory(m_size); + ::boost::compute::distributed::copy(other, *this, m_queue); + } + return *this; + } + + /// Copy assignment. This operation is always non-blocking. + template + vector& operator=(const vector &other) + { + m_queue = other.get_queue(); + m_size = other.size(); + allocate_memory(m_size); + ::boost::compute::distributed::copy(other, *this, m_queue); + return *this; + } + + /// Copy assignment. This operation is always non-blocking. + template + vector& operator=(const std::vector &vector) + { + m_size = vector.size(); + allocate_memory(m_size); + ::boost::compute::distributed::copy( + vector.begin(), vector.end(), *this, m_queue + ); + return *this; + } + + #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES + /// Move-constructs a new vector from \p other. + vector(vector&& other) + : m_queue(std::move(m_queue)), + m_size(other.m_size), + m_data(std::move(other.m_data)), + m_data_indices(std::move(other.m_data_indices)), + m_allocators(std::move(other.m_allocators)) + { + other.m_size = 0; + } + + /// Move-assigns the data from \p other to \c *this. + vector& operator=(vector&& other) + { + if(m_size) { + for(size_t i = 0; i < m_allocators.size(); i++) { + m_allocators[i].deallocate(m_data[i], part_size(i)); + } + } + + m_queue = std::move(other.m_queue); + m_size = other.m_size; + m_data = std::move(other.m_data); + m_data_indices = std::move(other.m_data_indices); + m_allocators = std::move(other.m_allocators); + + other.m_size = 0; + + return *this; + } + #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES + + /// Destroys the vector object. + ~vector() + { + if(m_size) { + for(size_t i = 0; i < m_allocators.size(); i++) { + m_allocators[i].deallocate(m_data[i], part_size(i)); + } + } + } + + /// Returns the number of elements in the vector. + size_type size() const + { + return m_size; + } + + size_t parts() const + { + return m_data.size(); + } + + std::vector parts_sizes() const + { + std::vector part_sizes(parts()); + for(size_t i = 0; i < parts(); i++) { + part_sizes[i] = part_size(i); + } + return part_sizes; + } + + size_type part_size(size_t n) const + { + return m_data_indices[n+1] - m_data_indices[n]; + } + + std::vector parts_starts() const + { + return m_data_indices; + } + + size_t part_start(size_t n) const + { + return m_data_indices[n]; + } + + /// Returns \c true if the vector is empty. + bool empty() const + { + return m_size == 0; + } + + iterator begin(size_t n) + { + return ::boost::compute::make_buffer_iterator( + m_data[n].get_buffer(), 0 + ); + } + + const_iterator begin(size_t n) const + { + return ::boost::compute::make_buffer_iterator( + m_data[n].get_buffer(), 0 + ); + } + + const_iterator cbegin(size_t n) const + { + return begin(n); + } + + iterator end(size_t n) + { + return ::boost::compute::make_buffer_iterator( + m_data[n].get_buffer(), m_data_indices[n+1] - m_data_indices[n] + ); + } + + const_iterator end(size_t n) const + { + return ::boost::compute::make_buffer_iterator( + m_data[n].get_buffer(), m_data_indices[n+1] - m_data_indices[n] + ); + } + + const_iterator cend(size_t n) const + { + return end(n); + } + + reverse_iterator rbegin(size_t n) + { + return reverse_iterator(end(n) - 1); + } + + const_reverse_iterator rbegin(size_t n) const + { + return reverse_iterator(end(n) - 1); + } + + const_reverse_iterator crbegin(size_t n) const + { + return rbegin(n); + } + + reverse_iterator rend(size_t n) + { + return reverse_iterator(begin(n) - 1); + } + + const_reverse_iterator rend(size_t n) const + { + return reverse_iterator(begin(n) - 1); + } + + const_reverse_iterator crend(size_t n) const + { + return rend(n); + } + + reference operator[](size_type index) + { + size_t n = + std::upper_bound(m_data_indices.begin(), m_data_indices.end(), index) + - m_data_indices.begin() - 1; + size_t part_index = index - m_data_indices[n]; + return *(begin(n) + static_cast(part_index)); + } + + const_reference operator[](size_type index) const + { + size_t n = + std::upper_bound(m_data_indices.begin(), m_data_indices.end(), index) + - m_data_indices.begin() - 1; + size_t part_index = index - m_data_indices[n]; + return *(begin(n) + static_cast(part_index)); + } + + reference at(size_type index) + { + if(index >= size()){ + BOOST_THROW_EXCEPTION(std::out_of_range("index out of range")); + } + + return operator[](index); + } + + const_reference at(size_type index) const + { + if(index >= size()){ + BOOST_THROW_EXCEPTION(std::out_of_range("index out of range")); + } + + return operator[](index); + } + + reference front() + { + return *begin(parts() - 1); + } + + const_reference front() const + { + return *begin(parts() - 1); + } + + reference back() + { + return *(end(parts() - 1) - static_cast(1)); + } + + const_reference back() const + { + return *(end(parts() - 1) - static_cast(1)); + } + + /// Swaps the contents of \c *this with \p other. + void swap(vector &other) + { + std::swap(m_data, other.m_data); + std::swap(m_data_indices, other.m_data_indices); + std::swap(m_size, other.m_size); + std::swap(m_allocators, other.m_allocators); + std::swap(m_queue, other.m_queue); + } + + /// Returns the underlying buffer. + std::vector get_buffers() const + { + std::vector buffers; + buffers.reserve(m_data.size()); + for(size_t i = 0; i < m_data.size(); i++) { + buffers.push_back(m_data[i].get_buffer()); + } + return buffers; + } + + /// Returns the underlying buffer for part \p n. + const buffer& get_buffer(size_t n) const + { + return m_data[n].get_buffer(); + } + + command_queue get_queue() const + { + return m_queue; + } + + /// command queue. + const context& get_context() const + { + return m_queue.get_context(); + } + +private: + /// \internal_ + BOOST_CONSTEXPR size_type _align() const { return 16; } + + /// \internal_ + BOOST_CONSTEXPR size_type _minimum_capacity() const { return _align(); } + + /// \internal_ + BOOST_CONSTEXPR float _growth_factor() const { return 1.5; } + + void allocate_memory(size_type count) + { + m_allocators.clear(); + m_data.clear(); + m_data_indices.clear(); + + m_allocators.reserve(m_queue.size()); + m_data.reserve(m_queue.size()); + m_data_indices.reserve(m_queue.size() + 1); + + std::vector partition = + detail::partition(m_queue, weight, count, _align()); + for(size_t i = 0; i < m_queue.size(); i++) + { + size_type data_size = partition[i + 1] - partition[i]; + m_allocators.push_back(Alloc(m_queue.get_context(i))); + m_data.push_back( + m_allocators.back() + .allocate((std::max)(data_size, _minimum_capacity())) + ); + m_data_indices.push_back(partition[i]); + } + m_data_indices.push_back(partition[m_queue.size()]); + } + +private: + command_queue m_queue; + size_type m_size; + + std::vector m_data_indices; + std::vector m_data; + std::vector m_allocators; +}; + + +namespace detail { + +template +struct is_distributed_vector< vector > : boost::true_type {}; + +} // end detail namespace + +} // end distributed namespace +} // end compute namespace +} // end boost namespace + +#endif /* BOOST_COMPUTE_DISTRIBUTED_VECTOR_HPP */ diff --git a/include/boost/compute/utility/wait_list.hpp b/include/boost/compute/utility/wait_list.hpp index 8b81924d6..5b53c5aad 100644 --- a/include/boost/compute/utility/wait_list.hpp +++ b/include/boost/compute/utility/wait_list.hpp @@ -155,6 +155,23 @@ class wait_list insert(future.get_event()); } + /// Inserts \p event into the wait-list. + bool safe_insert(const event &event) + { + if(event.get()) { + m_events.push_back(event); + return true; + } + return false; + } + + /// Inserts the event from \p future into the wait-list. + template + bool safe_insert(const future &future) + { + return safe_insert(future.get_event()); + } + /// Blocks until all of the events in the wait-list have completed. /// /// Does nothing if the wait-list is empty. diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 42e3969e1..0fba1a464 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -81,6 +81,14 @@ add_compute_test("core.system" test_system.cpp) add_compute_test("core.type_traits" test_type_traits.cpp) add_compute_test("core.user_event" test_user_event.cpp) +add_compute_test("distributed.context" test_distributed_context.cpp) +add_compute_test("distributed.command_queue" test_distributed_command_queue.cpp) +add_compute_test("distributed.vector" test_distributed_vector.cpp) +add_compute_test("distributed.copy" test_distributed_copy.cpp) +add_compute_test("distributed.reduce" test_distributed_reduce.cpp) +add_compute_test("distributed.transform" test_distributed_transform.cpp) +add_compute_test("distributed.transform" test_distributed_scan.cpp) + add_compute_test("utility.extents" test_extents.cpp) add_compute_test("utility.invoke" test_invoke.cpp) add_compute_test("utility.program_cache" test_program_cache.cpp) diff --git a/test/distributed_check_functions.hpp b/test/distributed_check_functions.hpp new file mode 100644 index 000000000..b1a374f98 --- /dev/null +++ b/test/distributed_check_functions.hpp @@ -0,0 +1,131 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2016 Jakub Szuppe +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_TEST_CHECK_FUNCTIONS_HPP +#define BOOST_COMPUTE_TEST_CHECK_FUNCTIONS_HPP + +#include +#include +#include +#include + +template< + class T1, boost::compute::distributed::weight_func weight, class Alloc1, + class T2, class Alloc2 +> +inline bool +distributed_equal(const boost::compute::distributed::vector &input1, + const boost::compute::distributed::vector &input2, + boost::compute::distributed::command_queue &queue) +{ + if(input1.parts() != input2.parts()) { + return false; + } + if(input1.size() != input2.size()) { + return false; + } + + for(size_t i = 0; i < input1.parts(); i++) + { + if( + !boost::compute::equal( + input1.begin(i), input1.end(i), input2.begin(i), queue.get(i) + ) + ) + { + return false; + } + } + return true; +} + +template< + class T, boost::compute::distributed::weight_func weight, class Alloc +> +inline bool +distributed_equal(const boost::compute::distributed::vector &input, + const T value, + boost::compute::distributed::command_queue &queue) +{ + for(size_t i = 0; i < input.parts(); i++) + { + if( + !boost::compute::equal( + input.begin(i), + input.end(i), + boost::compute::make_constant_iterator(value), + queue.get(i) + ) + ) + { + return false; + } + } + return true; +} + +template< + class T, boost::compute::distributed::weight_func weight, class Alloc +> +inline bool +distributed_equal(const boost::compute::distributed::vector &input, + typename std::vector::iterator first, + typename std::vector::iterator end, + boost::compute::distributed::command_queue &queue) +{ + if(std::distance(first, end) != input.size()) { + return false; + } + + typename std::vector::iterator part_first = first; + typename std::vector::iterator part_end = first; + for(size_t i = 0; i < input.parts(); i++) + { + part_end += input.part_size(i); + boost::compute::vector data(part_first, part_end, queue.get(i)); + if( + !boost::compute::equal( + input.begin(i), + input.end(i), + data.begin(), + queue.get(i) + ) + ) + { + return false; + } + part_first = part_end; + } + return true; +} + +template< + class T1, boost::compute::distributed::weight_func weight1, class Alloc1, + class T2, boost::compute::distributed::weight_func weight2, class Alloc2 +> +inline bool +distributed_equal(const boost::compute::distributed::vector &input1, + const boost::compute::distributed::vector &input2, + boost::compute::distributed::command_queue &queue1, + boost::compute::distributed::command_queue &queue2) +{ + if(input1.parts() != input2.parts()) { + return false; + } + if(input1.size() != input2.size()) { + return false; + } + + std::vector host1(input1.size()); + boost::compute::distributed::copy(input1, host1.begin(), queue1); + return distributed_equal(input2, host1.begin(), host1.end(), queue2); +} + +#endif /* BOOST_COMPUTE_TEST_TEST_CHECK_FUNCTIONS_HPP */ diff --git a/test/distributed_queue_setup.hpp b/test/distributed_queue_setup.hpp new file mode 100644 index 000000000..062ba1698 --- /dev/null +++ b/test/distributed_queue_setup.hpp @@ -0,0 +1,45 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2016 Jakub Szuppe +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_TEST_DISTRIBUTED_QUEUE_SETUP_HPP +#define BOOST_COMPUTE_TEST_DISTRIBUTED_QUEUE_SETUP_HPP + +#include + +inline boost::compute::distributed::command_queue +get_distributed_queue(boost::compute::command_queue& queue, + size_t n = 1, + bool same_context = false) +{ + std::vector queues; + queues.push_back(queue); + for(size_t i = 0; i < n; i++) { + if(same_context) { + queues.push_back( + boost::compute::command_queue( + queue.get_context(), + queue.get_device() + ) + ); + } + else { + queues.push_back( + boost::compute::command_queue( + boost::compute::context(queue.get_device()), + queue.get_device() + ) + ); + } + } + + return boost::compute::distributed::command_queue(queues); +} + +#endif /* BOOST_COMPUTE_TEST_DISTRIBUTED_QUEUE_SETUP_HPP */ diff --git a/test/test_distributed_command_queue.cpp b/test/test_distributed_command_queue.cpp new file mode 100644 index 000000000..d17fc3452 --- /dev/null +++ b/test/test_distributed_command_queue.cpp @@ -0,0 +1,193 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2016 Jakub Szuppe +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#define BOOST_TEST_MODULE TestDistributedCommandQueue +#include + +#include +#include +#include +#include +#include +#include + +#include "check_macros.hpp" +#include "context_setup.hpp" + +namespace bc = boost::compute; + +BOOST_AUTO_TEST_CASE(construct_from_distributed_context) +{ + std::vector contexts; + contexts.push_back(context); + + bc::distributed::context distributed_context(contexts); + bc::distributed::command_queue distributed_queue1(distributed_context); + BOOST_CHECK_EQUAL( + distributed_queue1.size(), + distributed_context.get_devices(0).size() + ); + + bc::distributed::command_queue distributed_queue2( + distributed_context, bc::distributed::command_queue::enable_profiling + ); + BOOST_CHECK_EQUAL( + distributed_queue2.size(), + distributed_context.get_devices(0).size() + ); + BOOST_CHECK_EQUAL( + distributed_queue2 + .get_info(0, CL_QUEUE_PROPERTIES), + bc::distributed::command_queue::enable_profiling + ); +} + +BOOST_AUTO_TEST_CASE(construct_from_contexts) +{ + std::vector contexts; + std::vector > devices; + + contexts.push_back(context); + devices.push_back(std::vector()); + devices[0].push_back(device); + + bc::distributed::command_queue distributed_queue1(contexts, devices); + BOOST_CHECK_EQUAL( + distributed_queue1.size(), + 1 + ); + + devices[0].push_back(device); + + bc::distributed::command_queue distributed_queue2( + contexts, devices, bc::distributed::command_queue::enable_profiling + ); + BOOST_CHECK_EQUAL( + distributed_queue2.size(), + 2 + ); + BOOST_CHECK_EQUAL( + distributed_queue2 + .get_info(0, CL_QUEUE_PROPERTIES), + bc::distributed::command_queue::enable_profiling + ); +} + +BOOST_AUTO_TEST_CASE(construct_from_context) +{ + bc::distributed::command_queue distributed_queue(context); + BOOST_CHECK_EQUAL( + distributed_queue.size(), + context.get_devices().size() + ); + for(size_t i = 0; i < distributed_queue.size(); i++) { + BOOST_CHECK_EQUAL( + distributed_queue.get_context(i), + context + ); + } +} + +BOOST_AUTO_TEST_CASE(construct_from_command_queues) +{ + std::vector queues; + queues.push_back(queue); + + bc::distributed::command_queue distributed_queue1(queues); + BOOST_CHECK_EQUAL(distributed_queue1.size(), 1); + + queues.push_back(queue); + bc::distributed::command_queue distributed_queue2(queues); + BOOST_CHECK_EQUAL(distributed_queue2.size(), 2); +} + +BOOST_AUTO_TEST_CASE(get_context) +{ + std::vector contexts; + contexts.push_back(context); + contexts.push_back(context); + + bc::distributed::context distributed_context(contexts); + bc::distributed::command_queue distributed_queue(distributed_context); + + BOOST_CHECK( + distributed_queue.get_context() == distributed_context + ); + BOOST_CHECK( + distributed_queue.get_context(0) == context + ); + BOOST_CHECK( + distributed_queue.get_context(1) == context + ); +} + +BOOST_AUTO_TEST_CASE(get_devices) +{ + std::vector contexts; + contexts.push_back(context); + contexts.push_back(context); + + bc::distributed::context distributed_context(contexts); + bc::distributed::command_queue distributed_queue(distributed_context); + + BOOST_CHECK( + distributed_queue.get_device(0) == context.get_device() + ); + BOOST_CHECK( + distributed_queue.get_device(1) == context.get_device() + ); +} + +BOOST_AUTO_TEST_CASE(get_command_queue) +{ + std::vector queues; + queues.push_back(queue); + queues.push_back(queue); + + bc::distributed::command_queue distributed_queue(queues); + + BOOST_CHECK( + distributed_queue.get(0) == queue + ); + BOOST_CHECK( + distributed_queue.get(1) == queue + ); +} + +BOOST_AUTO_TEST_CASE(enqueue_kernel) +{ + std::vector queues; + queues.push_back(queue); + queues.push_back(queue); + + bc::distributed::command_queue distributed_queue(queues); + + const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( + __kernel void foo(__global int *output) + { + output[get_global_id(0)] = get_global_id(0); + } + ); + + for(size_t i = 0; i < distributed_queue.size(); i++) + { + bc::command_queue& queue = distributed_queue.get(0); + bc::vector output(8, queue.get_context()); + + bc::kernel kernel = + bc::kernel::create_with_source(source, "foo", queue.get_context()); + + kernel.set_arg(0, output); + queue.enqueue_1d_range_kernel(kernel, 0, output.size(), 0); + CHECK_RANGE_EQUAL(bc::uint_, 8, output, (0, 1, 2, 3, 4, 5, 6, 7)); + } +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/test/test_distributed_context.cpp b/test/test_distributed_context.cpp new file mode 100644 index 000000000..792d5d361 --- /dev/null +++ b/test/test_distributed_context.cpp @@ -0,0 +1,155 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2016 Jakub Szuppe +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#define BOOST_TEST_MODULE TestDistributedContext +#include + +#include +#include +#include +#include +#include + +#include "check_macros.hpp" +#include "context_setup.hpp" + +namespace bc = boost::compute; + +BOOST_AUTO_TEST_CASE(construct_from_devices) +{ + std::vector > all_devices; + + const std::vector &platforms = bc::system::platforms(); + for(size_t i = 0; i < platforms.size(); i++){ + const bc::platform &platform = platforms[i]; + + std::vector platform_devices = platform.devices(); + std::vector properties(platform_devices.size(), 0); + + // create a distributed context for devices in current platform + bc::distributed::context ctx1(platform_devices); + bc::distributed::context ctx2(platform_devices, properties); + + // check context count + BOOST_CHECK_EQUAL(ctx1.size(), platform_devices.size()); + BOOST_CHECK_EQUAL(ctx2.size(), platform_devices.size()); + + all_devices.push_back(platform_devices); + } + + // create a distributed context for devices in current platform + std::vector properties(all_devices.size(), 0); + bc::distributed::context ctx(all_devices, properties); + BOOST_CHECK_EQUAL(ctx.size(), all_devices.size()); +} + +BOOST_AUTO_TEST_CASE(construct_from_contexts) +{ + std::vector contexts; + + const std::vector &platforms = bc::system::platforms(); + for(size_t i = 0; i < platforms.size(); i++){ + const bc::platform &platform = platforms[i]; + + // create a context for containing all devices in the platform + bc::context ctx(platform.devices()); + contexts.push_back(ctx); + } + + bc::distributed::context ctx1(contexts); + bc::distributed::context ctx2(contexts.begin(), contexts.end()); + + BOOST_CHECK_EQUAL(ctx1.size(), contexts.size()); + BOOST_CHECK_EQUAL(ctx2.size(), contexts.size()); + for(size_t i = 0; i < contexts.size(); i++) { + BOOST_CHECK_EQUAL(ctx1.get(i), contexts[i]); + BOOST_CHECK_EQUAL(ctx2.get(i), contexts[i]); + } +} + +BOOST_AUTO_TEST_CASE(construct_from_context) +{ + bc::distributed::context ctx(context); + BOOST_CHECK_EQUAL(ctx.size(), 1); + BOOST_CHECK_EQUAL(ctx.get(0), context); +} + +BOOST_AUTO_TEST_CASE(copy_ctor) +{ + std::vector contexts; + contexts.push_back(context); + bc::distributed::context distributed_context1(contexts); + bc::distributed::context distributed_context2(distributed_context1); + BOOST_CHECK( + distributed_context1 == distributed_context2 + ); +} + +BOOST_AUTO_TEST_CASE(assign_operator) +{ + std::vector contexts; + contexts.push_back(context); + bc::distributed::context distributed_context1(contexts); + bc::distributed::context distributed_context2 = distributed_context1; + BOOST_CHECK( + distributed_context1 == distributed_context2 + ); +} + +BOOST_AUTO_TEST_CASE(equality_operator) +{ + std::vector contexts; + contexts.push_back(context); + bc::distributed::context distributed_context1(contexts); + bc::distributed::context distributed_context2(contexts); + + contexts.push_back( + bc::context(queue.get_device()) + ); + bc::distributed::context distributed_context3(contexts); + + BOOST_CHECK(distributed_context1 == distributed_context2); + BOOST_CHECK(distributed_context2 == distributed_context1); + + BOOST_CHECK(distributed_context1 != distributed_context3); + BOOST_CHECK(distributed_context3 != distributed_context1); + + BOOST_CHECK(distributed_context2 != distributed_context3); + BOOST_CHECK(distributed_context3 != distributed_context2); +} + +BOOST_AUTO_TEST_CASE(get_info) +{ + std::vector contexts; + contexts.push_back(context); + bc::distributed::context distributed_context(contexts); + + BOOST_CHECK( + distributed_context.get_info >(0, CL_CONTEXT_DEVICES) + == context.get_info >(CL_CONTEXT_DEVICES) + ); +} + +BOOST_AUTO_TEST_CASE(get_context) +{ + std::vector contexts; + contexts.push_back(context); + contexts.push_back(context); + contexts.push_back(bc::context(context.get_device())); + bc::distributed::context distributed_context(contexts); + + BOOST_CHECK_EQUAL(distributed_context.get(0), context); + BOOST_CHECK_EQUAL(distributed_context.get(1), context); + for(size_t i = 0; i < distributed_context.size(); i++) { + BOOST_CHECK_EQUAL(distributed_context.get(i), contexts[i]); + } +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/test/test_distributed_copy.cpp b/test/test_distributed_copy.cpp new file mode 100644 index 000000000..a00aaea47 --- /dev/null +++ b/test/test_distributed_copy.cpp @@ -0,0 +1,519 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2016 Jakub Szuppe +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#define BOOST_TEST_MODULE TestDistributedCopy +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "check_macros.hpp" +#include "context_setup.hpp" + +#include "distributed_check_functions.hpp" +#include "distributed_queue_setup.hpp" + +namespace bc = boost::compute; + +BOOST_AUTO_TEST_CASE(copy_from_host) +{ + // construct distributed::command_queue + bc::distributed::command_queue distributed_queue = + get_distributed_queue(queue); + + size_t size = 64; + bc::int_ value = -1; + bc::distributed::vector distributed_vector( + size, value, distributed_queue + ); + distributed_queue.finish(); + + std::vector host(size_t(size), bc::int_(1000)); + host[size - 1] = -10; + host[0] = -20; + host[size/2] = -30; + + // empty copy + bc::distributed::copy( + host.begin(), host.begin(), distributed_vector, distributed_queue + ); + BOOST_CHECK(distributed_equal(distributed_vector, value, distributed_queue)); + + // full copy + bc::distributed::copy( + host.begin(), host.end(), distributed_vector, distributed_queue + ); + BOOST_CHECK( + distributed_equal( + distributed_vector, + host.begin(), host.end(), + distributed_queue + ) + ); +} + +BOOST_AUTO_TEST_CASE(copy_async_from_host) +{ + // construct distributed::command_queue + bc::distributed::command_queue distributed_queue = + get_distributed_queue(queue); + + size_t size = 64; + bc::int_ value = -1; + bc::distributed::vector distributed_vector( + size, value, distributed_queue + ); + distributed_queue.finish(); + + std::vector host(size_t(size), bc::int_(1000)); + host[size - 1] = -10; + host[0] = -20; + host[size/2] = -30; + + // empty copy + std::vector events = + bc::distributed::copy_async( + host.begin(), host.begin(), distributed_vector, distributed_queue + ); + for(size_t i = 0; i < events.size(); i++) { + events[i].wait(); + } + BOOST_CHECK(distributed_equal(distributed_vector, value, distributed_queue)); + + // full copy + events = + bc::distributed::copy_async( + host.begin(), host.end(), distributed_vector, distributed_queue + ); + for(size_t i = 0; i < events.size(); i++) { + events[i].wait(); + } + BOOST_CHECK( + distributed_equal( + distributed_vector, + host.begin(), host.end(), + distributed_queue + ) + ); +} + +BOOST_AUTO_TEST_CASE(copy_to_host) +{ + // construct distributed::command_queue + bc::distributed::command_queue distributed_queue = + get_distributed_queue(queue); + + size_t size = 64; + bc::int_ value = -1; + bc::distributed::vector distributed_vector( + size, value, distributed_queue + ); + distributed_queue.finish(); + + std::vector host(size, bc::int_(1000)); + bc::distributed::copy( + distributed_vector, host.begin(), distributed_queue + ); + for(size_t i = 0; i < host.size(); i++) { + BOOST_CHECK_EQUAL(host[i], value); + } +} + +BOOST_AUTO_TEST_CASE(copy_async_to_host) +{ + // construct distributed::command_queue + bc::distributed::command_queue distributed_queue = + get_distributed_queue(queue); + + size_t size = 64; + bc::int_ value = -1; + bc::distributed::vector distributed_vector( + size, value, distributed_queue + ); + distributed_queue.finish(); + + std::vector host(size, bc::int_(1000)); + std::vector events = + bc::distributed::copy_async( + distributed_vector, host.begin(), distributed_queue + ); + // wait for copy + for(size_t i = 0; i < events.size(); i++) { + events[i].wait(); + } + // check values + for(size_t i = 0; i < host.size(); i++) { + BOOST_CHECK_EQUAL(host[i], value); + } +} + +BOOST_AUTO_TEST_CASE(copy_from_vector_to_vector) +{ + // construct distributed::command_queue + bc::distributed::command_queue distributed_queue = + get_distributed_queue(queue); + + size_t size = 64; + bc::int_ value1 = -1; + bc::int_ value2 = 1; + bc::distributed::vector distributed_vector1( + size, value1, distributed_queue + ); + bc::distributed::vector distributed_vector2( + size, value2, distributed_queue + ); + distributed_queue.finish(); + + BOOST_CHECK(distributed_equal(distributed_vector1, value1, distributed_queue)); + BOOST_CHECK(distributed_equal(distributed_vector2, value2, distributed_queue)); + + bc::distributed::copy( + distributed_vector1, distributed_vector2, distributed_queue + ); + + // check if distributed_vector1 is the same + BOOST_CHECK(distributed_equal(distributed_vector1, value1, distributed_queue)); + // and it was copied into distributed_vector2 + BOOST_CHECK( + distributed_equal( + distributed_vector1, + distributed_vector2, + distributed_queue + ) + ); + + // change distributed_vector1 + distributed_vector1 + .begin(distributed_vector1.parts() - 1) + .write(99, distributed_queue.get(distributed_vector1.parts() - 1)); + distributed_queue.get(distributed_vector1.parts() - 1).finish(); + + // copy once again + bc::distributed::copy( + distributed_vector1, distributed_vector2, distributed_queue + ); + + // check + BOOST_CHECK( + distributed_equal( + distributed_vector1, + distributed_vector2, + distributed_queue + ) + ); + + // copy between vectors of different sizes + bc::int_ value3 = 12; + bc::distributed::vector distributed_vector3( + 2 * size, value3, distributed_queue + ); + + bc::distributed::copy( + distributed_vector1, distributed_vector3, distributed_queue + ); + + // check + std::vector host(distributed_vector3.size(), value3); + bc::distributed::copy( + distributed_vector1, host.begin(), distributed_queue + ); + BOOST_CHECK( + distributed_equal( + distributed_vector3, + host.begin(), host.end(), + distributed_queue + ) + ); +} + +BOOST_AUTO_TEST_CASE(copy_async_from_vector_to_vector) +{ + // construct distributed::command_queue + bc::distributed::command_queue distributed_queue = + get_distributed_queue(queue); + + size_t size = 64; + bc::int_ value1 = -1; + bc::int_ value2 = 1; + bc::distributed::vector distributed_vector1( + size, value1, distributed_queue + ); + bc::distributed::vector distributed_vector2( + size, value2, distributed_queue + ); + distributed_queue.finish(); + + BOOST_CHECK(distributed_equal(distributed_vector1, value1, distributed_queue)); + BOOST_CHECK(distributed_equal(distributed_vector2, value2, distributed_queue)); + + std::vector events = bc::distributed::copy_async( + distributed_vector1, distributed_vector2, distributed_queue + ); + // wait for copy + for(size_t i = 0; i < events.size(); i++) { + events[i].wait(); + } + + // check if distributed_vector1 is the same + BOOST_CHECK(distributed_equal(distributed_vector1, value1, distributed_queue)); + // and it was copied into distributed_vector2 + BOOST_CHECK( + distributed_equal( + distributed_vector1, + distributed_vector2, + distributed_queue + ) + ); + + // change distributed_vector1 + distributed_vector1 + .begin(distributed_vector1.parts() - 1) + .write(99, distributed_queue.get(distributed_vector1.parts() - 1)); + distributed_queue.get(distributed_vector1.parts() - 1).finish(); + + // copy once again + events = bc::distributed::copy_async( + distributed_vector1, distributed_vector2, distributed_queue + ); + // wait for copy + for(size_t i = 0; i < events.size(); i++) { + events[i].wait(); + } + + // check + BOOST_CHECK( + distributed_equal( + distributed_vector1, + distributed_vector2, + distributed_queue + ) + ); +} + +std::vector custom_weight_func(const bc::distributed::command_queue& queue) +{ + return std::vector(queue.size(), 1.0/queue.size()); +} + +BOOST_AUTO_TEST_CASE(copy_from_vector_to_vector_different_vectors) +{ + // construct distributed::command_queue + bc::distributed::command_queue distributed_queue = + get_distributed_queue(queue); + + size_t size = 64; + bc::int_ value1 = -1; + bc::long_ value2 = 1; + bc::distributed::vector distributed_vector1( + size, value1, distributed_queue + ); + bc::distributed::vector distributed_vector2( + size, value2, distributed_queue + ); + distributed_queue.finish(); + + BOOST_CHECK(distributed_equal(distributed_vector1, value1, distributed_queue)); + BOOST_CHECK(distributed_equal(distributed_vector2, value2, distributed_queue)); + + bc::distributed::copy( + distributed_vector1, distributed_vector2, distributed_queue, distributed_queue + ); + + // check if distributed_vector1 is the same + BOOST_CHECK(distributed_equal(distributed_vector1, value1, distributed_queue)); + // and it was copied into distributed_vector2 + BOOST_CHECK( + distributed_equal( + distributed_vector1, + distributed_vector2, + distributed_queue, + distributed_queue + ) + ); + + // change distributed_vector1 + distributed_vector1 + .begin(distributed_vector1.parts() - 1) + .write(99, distributed_queue.get(distributed_vector1.parts() - 1)); + distributed_queue.get(distributed_vector1.parts() - 1).finish(); + + // copy once again + bc::distributed::copy( + distributed_vector1, distributed_vector2, distributed_queue, distributed_queue + ); + + // check + BOOST_CHECK( + distributed_equal( + distributed_vector1, + distributed_vector2, + distributed_queue, + distributed_queue + ) + ); +} + +BOOST_AUTO_TEST_CASE(copy_device_to_vector) +{ + // construct distributed::command_queue + bc::distributed::command_queue distributed_queue = + get_distributed_queue(queue); + + size_t size = 64; + bc::int_ value1 = -1; + bc::long_ value2 = 1; + bc::vector device_vector(size, value1, queue); + bc::distributed::vector distributed_vector( + size, value2, distributed_queue + ); + + bc::distributed::copy( + device_vector.begin(), device_vector.end(), distributed_vector, + queue, distributed_queue + ); + + BOOST_CHECK( + distributed_equal( + distributed_vector, bc::long_(value1), distributed_queue + ) + ); +} + +BOOST_AUTO_TEST_CASE(copy_async_device_to_vector) +{ + // construct distributed::command_queue + bc::distributed::command_queue distributed_queue = + get_distributed_queue(queue, 2, true); + + size_t size = 64; + bc::int_ value1 = -1; + bc::long_ value2 = 1; + bc::vector device_vector(size, value1, queue); + bc::distributed::vector distributed_vector( + size, value2, distributed_queue + ); + + std::vector events = + bc::distributed::copy_async( + device_vector.begin(), device_vector.end(), distributed_vector, + queue, distributed_queue + ); + // wait for copy + for(size_t i = 0; i < events.size(); i++) { + events[i].wait(); + } + + BOOST_CHECK( + distributed_equal( + distributed_vector, bc::long_(value1), distributed_queue + ) + ); +} + +BOOST_AUTO_TEST_CASE(copy_vector_to_device) +{ + // construct distributed::command_queue + bc::distributed::command_queue distributed_queue = + get_distributed_queue(queue); + + size_t size = 64; + bc::int_ value1 = -1; + bc::long_ value2 = 1; + bc::vector device_vector(size, value1, queue); + bc::distributed::vector distributed_vector( + size, value2, distributed_queue + ); + + BOOST_CHECK( + distributed_equal( + distributed_vector, value2, distributed_queue + ) + ); + BOOST_CHECK( + boost::compute::equal( + device_vector.begin(), + device_vector.end(), + boost::compute::make_constant_iterator(value1), + queue + ) + ); + + bc::distributed::copy( + distributed_vector, device_vector.begin(), + distributed_queue, queue + ); + + BOOST_CHECK( + boost::compute::equal( + device_vector.begin(), + device_vector.end(), + boost::compute::make_constant_iterator(static_cast(value2)), + queue + ) + ); +} + +BOOST_AUTO_TEST_CASE(copy_async_vector_to_device) +{ + // construct distributed::command_queue + bc::distributed::command_queue distributed_queue = + get_distributed_queue(queue, 2, true); + + size_t size = 64; + bc::int_ value1 = -1; + bc::long_ value2 = 1; + bc::vector device_vector(size, value1, queue); + bc::distributed::vector distributed_vector( + size, value2, distributed_queue + ); + + BOOST_CHECK( + distributed_equal( + distributed_vector, value2, distributed_queue + ) + ); + BOOST_CHECK( + boost::compute::equal( + device_vector.begin(), + device_vector.end(), + boost::compute::make_constant_iterator(value1), + queue + ) + ); + + std::vector events = + bc::distributed::copy_async( + distributed_vector, device_vector.begin(), + distributed_queue, queue + ); + // wait for copy + for(size_t i = 0; i < events.size(); i++) { + events[i].wait(); + } + + BOOST_CHECK( + boost::compute::equal( + device_vector.begin(), + device_vector.end(), + boost::compute::make_constant_iterator(static_cast(value2)), + queue + ) + ); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/test/test_distributed_reduce.cpp b/test/test_distributed_reduce.cpp new file mode 100644 index 000000000..1f361c867 --- /dev/null +++ b/test/test_distributed_reduce.cpp @@ -0,0 +1,175 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2016 Jakub Szuppe +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#define BOOST_TEST_MODULE TestDistributedReduce +#include + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "check_macros.hpp" +#include "context_setup.hpp" + +#include "distributed_check_functions.hpp" +#include "distributed_queue_setup.hpp" + +namespace bc = boost::compute; + +BOOST_AUTO_TEST_CASE(reduce_int_to_host) +{ + // construct distributed::command_queue + bc::distributed::command_queue distributed_queue = + get_distributed_queue(queue); + + bc::int_ data[] = { 5, 1, 9, 17, 13 }; + bc::distributed::vector distributed_vector( + data, data + 5, distributed_queue + ); + distributed_queue.finish(); + + bc::int_ sum; + bc::distributed::reduce( + distributed_vector, + &sum, + bc::plus(), + distributed_queue + ); + BOOST_CHECK_EQUAL(sum, 45); + + bc::int_ product; + bc::distributed::reduce( + distributed_vector, + &product, + bc::multiplies(), + distributed_queue + ); + BOOST_CHECK_EQUAL(product, 9945); + + bc::int_ min_value; + bc::distributed::reduce( + distributed_vector, + &min_value, + bc::min(), + distributed_queue + ); + BOOST_CHECK_EQUAL(min_value, 1); + + bc::int_ max_value; + bc::distributed::reduce( + distributed_vector, + &max_value, + bc::max(), + distributed_queue + ); + BOOST_CHECK_EQUAL(max_value, 17); +} + +BOOST_AUTO_TEST_CASE(reduce_int_to_device) +{ + // construct distributed::command_queue + bc::distributed::command_queue distributed_queue = + get_distributed_queue(queue); + + bc::int_ data[] = { 1, 5, 9, 13, 17 }; + bc::distributed::vector distributed_vector( + data, data + 5, distributed_queue + ); + distributed_queue.finish(); + + bc::vector result1(1, distributed_queue.get_context(0)); + bc::distributed::reduce( + distributed_vector, + result1.begin(), + bc::plus(), + distributed_queue + ); + BOOST_CHECK_EQUAL(result1.begin().read(queue), 45); + + bc::vector result2(1, distributed_queue.get_context(1)); + bc::distributed::reduce( + distributed_vector, + result2.begin(), + bc::multiplies(), + distributed_queue + ); + BOOST_CHECK_EQUAL(result2.begin().read(distributed_queue.get(1)), 9945); +} + +BOOST_AUTO_TEST_CASE(reduce_int_to_device_one_context) +{ + // construct distributed::command_queue + bc::distributed::command_queue distributed_queue = + get_distributed_queue(queue, 1, true); + + bc::distributed::vector distributed_vector( + size_t(1024), bc::int_(1), distributed_queue + ); + distributed_vector[0] = 2; + distributed_queue.finish(); + + bc::vector result1(1, context); + bc::distributed::reduce( + distributed_vector, + result1.begin(), + bc::plus(), + distributed_queue + ); + BOOST_CHECK_EQUAL(result1.begin().read(queue), 1025); + + bc::vector result2(1, context); + bc::distributed::reduce( + distributed_vector, + result2.begin(), + bc::multiplies(), + distributed_queue + ); + distributed_queue.finish(); + BOOST_CHECK_EQUAL(result2.begin().read(distributed_queue.get(0)), 2); +} + +BOOST_AUTO_TEST_CASE(reduce_int_custom_function) +{ + // construct distributed::command_queue + bc::distributed::command_queue distributed_queue = + get_distributed_queue(queue); + + bc::distributed::vector distributed_vector( + size_t(34), bc::int_(2), distributed_queue + ); + distributed_queue.finish(); + + BOOST_COMPUTE_FUNCTION(bc::float_, custom_sum, (bc::float_ x, bc::float_ y), + { + return x + y; + }); + + + bc::float_ sum; + bc::distributed::reduce( + distributed_vector, + &sum, + custom_sum, + distributed_queue + ); + BOOST_CHECK_CLOSE(sum, bc::float_(68), 0.01); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/test/test_distributed_scan.cpp b/test/test_distributed_scan.cpp new file mode 100644 index 000000000..4c6e423f8 --- /dev/null +++ b/test/test_distributed_scan.cpp @@ -0,0 +1,169 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2016 Jakub Szuppe +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#define BOOST_TEST_MODULE TestDistributedScan +#include + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "check_macros.hpp" +#include "context_setup.hpp" + +#include "distributed_check_functions.hpp" +#include "distributed_queue_setup.hpp" + +namespace bc = boost::compute; + +BOOST_AUTO_TEST_CASE(exclusive_scan_int) +{ + // construct distributed::command_queue + bc::distributed::command_queue distributed_queue = + get_distributed_queue(queue, 4); + + std::vector data(size_t(128)); + for(size_t i = 0; i < data.size(); i++) { + data[i] = i; + } + + bc::distributed::vector distributed_input( + data.begin(), data.end(), distributed_queue + ); + bc::distributed::vector distributed_result( + data.size(), distributed_queue + ); + distributed_queue.finish(); + + BOOST_CHECK( + distributed_equal( + distributed_input, + data.begin(), data.end(), + distributed_queue + ) + ); + + bc::distributed::exclusive_scan( + distributed_input, + distributed_result, + bc::int_(10), + distributed_queue + ); + distributed_queue.finish(); + + bc::vector device_input(data.begin(), data.end(), queue); + bc::vector device_expected(data.size(), context); + std::vector host_expected(device_expected.size()); + bc::exclusive_scan( + device_input.begin(), + device_input.end(), + device_expected.begin(), + bc::int_(10), + queue + ); + bc::copy( + device_expected.begin(), + device_expected.end(), + host_expected.begin(), + queue + ); + queue.finish(); + + BOOST_CHECK( + distributed_equal( + distributed_input, + data.begin(), data.end(), + distributed_queue + ) + ); + BOOST_CHECK( + distributed_equal( + distributed_result, + host_expected.begin(), host_expected.end(), + distributed_queue + ) + ); +} + +BOOST_AUTO_TEST_CASE(exclusive_scan_custom_function_int) +{ + // construct distributed::command_queue + bc::distributed::command_queue distributed_queue = + get_distributed_queue(queue, 3); + + BOOST_COMPUTE_FUNCTION(bc::int_, custom_sum, (bc::int_ x, bc::int_ y), + { + return x + y; + }); + + std::vector data(size_t(128)); + for(size_t i = 0; i < data.size(); i++) { + data[i] = i; + } + + bc::distributed::vector distributed_input( + data.begin(), data.end(), distributed_queue + ); + distributed_queue.finish(); + + BOOST_CHECK( + distributed_equal( + distributed_input, + data.begin(), data.end(), + distributed_queue + ) + ); + + bc::distributed::exclusive_scan( + distributed_input, + distributed_input, + bc::int_(10), + custom_sum, + distributed_queue + ); + distributed_queue.finish(); + + bc::vector device_input(data.begin(), data.end(), queue); + bc::vector device_expected(data.size(), context); + std::vector host_expected(device_expected.size()); + bc::exclusive_scan( + device_input.begin(), + device_input.end(), + device_expected.begin(), + bc::int_(10), + queue + ); + bc::copy( + device_expected.begin(), + device_expected.end(), + host_expected.begin(), + queue + ); + queue.finish(); + BOOST_CHECK( + distributed_equal( + distributed_input, + host_expected.begin(), host_expected.end(), + distributed_queue + ) + ); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/test/test_distributed_transform.cpp b/test/test_distributed_transform.cpp new file mode 100644 index 000000000..a275d4e14 --- /dev/null +++ b/test/test_distributed_transform.cpp @@ -0,0 +1,233 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2016 Jakub Szuppe +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#define BOOST_TEST_MODULE TestDistributedTransform +#include + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "check_macros.hpp" +#include "context_setup.hpp" + +#include "distributed_check_functions.hpp" +#include "distributed_queue_setup.hpp" + +namespace bc = boost::compute; + +BOOST_AUTO_TEST_CASE(transform_async_int_abs) +{ + // construct distributed::command_queue + bc::distributed::command_queue distributed_queue = + get_distributed_queue(queue); + + bc::int_ data[] = { 1, -2, -3, -4, 5 }; + bc::distributed::vector distributed_vector( + data, data + 5, distributed_queue + ); + distributed_queue.finish(); + + std::vector host(data, data + 5); + BOOST_CHECK( + distributed_equal( + distributed_vector, + host.begin(), host.end(), + distributed_queue + ) + ); + + // transform + std::vector events = + bc::distributed::transform_async( + distributed_vector, + distributed_vector, + bc::abs(), + distributed_queue + ); + for(size_t i = 0; i < events.size(); i++) { + events[i].wait(); + } + + bc::int_ expected_data[] = { 1, 2, 3, 4, 5 }; + std::vector host_expected(expected_data, expected_data + 5); + BOOST_CHECK( + distributed_equal( + distributed_vector, + host_expected.begin(), host_expected.end(), + distributed_queue + ) + ); +} + +BOOST_AUTO_TEST_CASE(transform_float_custom_funtion) +{ + // construct distributed::command_queue + bc::distributed::command_queue distributed_queue = + get_distributed_queue(queue); + + bc::float_ data[] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f }; + bc::distributed::vector distributed_vector( + data, data + 5, distributed_queue + ); + distributed_queue.finish(); + + BOOST_COMPUTE_FUNCTION(float, pow3add4, (float x), + { + return pow(x, 3.0f) + 4.0f; + }); + + // transform + bc::distributed::transform( + distributed_vector, + distributed_vector, + pow3add4, + distributed_queue + ); + distributed_queue.finish(); + + BOOST_CHECK_CLOSE(bc::float_(distributed_vector[0]), 5.0f, 1e-4f); + BOOST_CHECK_CLOSE(bc::float_(distributed_vector[1]), 12.0f, 1e-4f); + BOOST_CHECK_CLOSE(bc::float_(distributed_vector[2]), 31.0f, 1e-4f); + BOOST_CHECK_CLOSE(bc::float_(distributed_vector[3]), 68.0f, 1e-4f); + BOOST_CHECK_CLOSE(bc::float_(distributed_vector[4]), 129.0f, 1e-4f); +} + +BOOST_AUTO_TEST_CASE(transform_async_int_add) +{ + // construct distributed::command_queue + bc::distributed::command_queue distributed_queue = + get_distributed_queue(queue); + + bc::int_ data1[] = { 1, -2, -3, -4, 5 }; + bc::int_ data2[] = { -1, 2, 3, 4, 0 }; + std::vector host1(data1, data1 + 5); + std::vector host2(data2, data2 + 5); + + bc::distributed::vector distributed_vector1( + data1, data1 + 5, distributed_queue + ); + bc::distributed::vector distributed_vector2( + data2, data2 + 5, distributed_queue + ); + bc::distributed::vector distributed_vector3( + size_t(5), distributed_queue + ); + distributed_queue.finish(); + + // add + std::vector events = + bc::distributed::transform_async( + distributed_vector1, + distributed_vector2, + distributed_vector3, + bc::plus(), + distributed_queue + ); + for(size_t i = 0; i < events.size(); i++) { + events[i].wait(); + } + + BOOST_CHECK( + distributed_equal( + distributed_vector1, + host1.begin(), host1.end(), + distributed_queue + ) + ); + BOOST_CHECK( + distributed_equal( + distributed_vector2, + host2.begin(), host2.end(), + distributed_queue + ) + ); + + bc::int_ expected_data_add[] = { 0, 0, 0, 0, 5 }; + std::vector expected_add( + expected_data_add, expected_data_add + 5 + ); + BOOST_CHECK( + distributed_equal( + distributed_vector3, + expected_add.begin(), expected_add.end(), + distributed_queue + ) + ); +} + +BOOST_AUTO_TEST_CASE(transform_int_multiply) +{ + // construct distributed::command_queue + bc::distributed::command_queue distributed_queue = + get_distributed_queue(queue); + + bc::int_ data1[] = { 1, -2, -3, -4, 5 }; + bc::int_ data2[] = { -1, 2, 3, 4, 0 }; + std::vector host1(data1, data1 + 5); + std::vector host2(data2, data2 + 5); + + bc::distributed::vector distributed_vector1( + data1, data1 + 5, distributed_queue + ); + bc::distributed::vector distributed_vector2( + data2, data2 + 5, distributed_queue + ); + bc::distributed::vector distributed_vector3( + size_t(5), distributed_queue + ); + distributed_queue.finish(); + + // multiply + bc::distributed::transform( + distributed_vector1, + distributed_vector2, + distributed_vector3, + bc::multiplies(), + distributed_queue + ); + + BOOST_CHECK( + distributed_equal( + distributed_vector1, + host1.begin(), host1.end(), + distributed_queue + ) + ); + BOOST_CHECK( + distributed_equal( + distributed_vector2, + host2.begin(), host2.end(), + distributed_queue + ) + ); + + bc::int_ expected_data_multiply[] = { -1, -4, -9, -16, 0 }; + std::vector expected_multiply( + expected_data_multiply, expected_data_multiply + 5 + ); + BOOST_CHECK( + distributed_equal( + distributed_vector3, + expected_multiply.begin(), expected_multiply.end(), + distributed_queue + ) + ); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/test/test_distributed_vector.cpp b/test/test_distributed_vector.cpp new file mode 100644 index 000000000..6647d1eee --- /dev/null +++ b/test/test_distributed_vector.cpp @@ -0,0 +1,342 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2016 Jakub Szuppe +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#define BOOST_TEST_MODULE TestDistributedVector +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "check_macros.hpp" +#include "context_setup.hpp" + +#include "distributed_check_functions.hpp" +#include "distributed_queue_setup.hpp" + +namespace bc = boost::compute; + +BOOST_AUTO_TEST_CASE(empty_vector) +{ + // construct distributed::command_queue + bc::distributed::command_queue distributed_queue = + get_distributed_queue(queue); + + bc::distributed::vector distributed_vector(distributed_queue); + + BOOST_CHECK(distributed_vector.empty()); + BOOST_CHECK(distributed_vector.size() == 0); + BOOST_CHECK(distributed_vector.parts() == 2); + + for(size_t i = 0; i < distributed_vector.parts(); i++) + { + BOOST_CHECK(distributed_vector.begin(i) == distributed_vector.end(i)); + BOOST_CHECK(distributed_vector.part_start(i) == 0); + BOOST_CHECK(distributed_vector.part_size(i) == 0); + } +} + +BOOST_AUTO_TEST_CASE(count_ctor) +{ + // construct distributed::command_queue + bc::distributed::command_queue distributed_queue = + get_distributed_queue(queue); + + bc::distributed::vector distributed_vector( + 64, distributed_queue + ); + + BOOST_CHECK(!distributed_vector.empty()); + BOOST_CHECK(distributed_vector.size() == 64); + BOOST_CHECK(distributed_vector.parts() == distributed_vector.get_queue().size()); + + size_t size_sum = 0; + for(size_t i = 0; i < distributed_vector.parts(); i++) + { + size_sum += distributed_vector.part_size(i); + } + BOOST_CHECK_EQUAL(distributed_vector.size(), size_sum); + + std::vector buffers = distributed_vector.get_buffers(); + for(size_t i = 0; i < distributed_vector.parts(); i++) + { + BOOST_CHECK(buffers[i].get() != 0); + BOOST_CHECK(buffers[i] == distributed_vector.get_buffer(i)); + } +} + +BOOST_AUTO_TEST_CASE(command_queue_ctor) +{ + // construct distributed::command_queue + bc::distributed::command_queue distributed_queue = + get_distributed_queue(queue); + + bc::uint_ value = 991; + bc::distributed::vector distributed_vector( + size_t(35), value, distributed_queue + ); + + BOOST_CHECK(!distributed_vector.empty()); + BOOST_CHECK(distributed_vector.size() == 35); + BOOST_CHECK(distributed_vector.parts() == distributed_vector.get_queue().size()); + + size_t size_sum = 0; + for(size_t i = 0; i < distributed_vector.parts(); i++) + { + size_sum += distributed_vector.part_size(i); + } + BOOST_CHECK_EQUAL(distributed_vector.size(), size_sum); + + // need to finish since back() and front() + // use different (self-made) queues + distributed_queue.finish(); + BOOST_CHECK_EQUAL(distributed_vector.back(), value); + BOOST_CHECK_EQUAL(distributed_vector.front(), value); + + BOOST_CHECK(distributed_equal(distributed_vector, value, distributed_queue)); +} + +BOOST_AUTO_TEST_CASE(command_queue_ctor_one_queue) +{ + // construct distributed::command_queue + // with only 1 device command queue + bc::distributed::command_queue distributed_queue = + get_distributed_queue(queue, 0); + + bc::uint_ value = 1; + bc::distributed::vector distributed_vector( + size_t(5), value, distributed_queue + ); + + BOOST_CHECK(!distributed_vector.empty()); + BOOST_CHECK(distributed_vector.size() == 5); + BOOST_CHECK(distributed_vector.parts() == 1); + BOOST_CHECK_EQUAL(distributed_vector.size(), distributed_vector.part_size(0)); + + // need to finish since back() and front() + // use different (self-made) queues + distributed_queue.finish(); + BOOST_CHECK_EQUAL(distributed_vector.back(), value); + BOOST_CHECK_EQUAL(distributed_vector.front(), value); + + BOOST_CHECK(distributed_equal(distributed_vector, value, distributed_queue)); +} + +BOOST_AUTO_TEST_CASE(host_iterator_ctor) +{ + // construct distributed::command_queue + bc::distributed::command_queue distributed_queue = + get_distributed_queue(queue); + + bc::int_ value = -1; + std::vector host_vector(50, value); + + bc::distributed::vector distributed_vector( + host_vector.begin(), host_vector.end(), distributed_queue + ); + + BOOST_CHECK(!distributed_vector.empty()); + BOOST_CHECK(distributed_vector.size() == host_vector.size()); + BOOST_CHECK(distributed_vector.parts() == distributed_vector.get_queue().size()); + + size_t size_sum = 0; + for(size_t i = 0; i < distributed_vector.parts(); i++) + { + size_sum += distributed_vector.part_size(i); + } + BOOST_CHECK_EQUAL(distributed_vector.size(), size_sum); + + BOOST_CHECK(distributed_equal(distributed_vector, value, distributed_queue)); + + // need to finish since back() and front() + // use different (self-made) queues + distributed_queue.finish(); + + distributed_vector.front() = 1; + distributed_vector.back() = 1; + BOOST_CHECK_EQUAL(distributed_vector.back(), 1); + BOOST_CHECK_EQUAL(distributed_vector.front(), 1); +} + +BOOST_AUTO_TEST_CASE(copy_ctor) +{ + // construct distributed::command_queue + bc::distributed::command_queue distributed_queue1 = + get_distributed_queue(queue); + // construct distributed::command_queue + bc::distributed::command_queue distributed_queue2 = + get_distributed_queue(queue, 2); + + bc::int_ value = -1; + size_t size = 64; + + bc::distributed::vector distributed_vector( + size, value, distributed_queue1 + ); + bc::distributed::vector distributed_vector_copy1( + distributed_vector + ); + bc::distributed::vector distributed_vector_copy2( + distributed_vector, distributed_queue2 + ); + bc::distributed::vector< + bc::int_, + bc::distributed::default_weight_func, bc::pinned_allocator + > distributed_vector_copy3( + distributed_vector, distributed_queue2 + ); + bc::distributed::vector distributed_vector_copy4( + distributed_vector, distributed_queue1 + ); + bc::distributed::vector< + bc::int_, + bc::distributed::default_weight_func, bc::pinned_allocator + > distributed_vector_copy5( + distributed_vector, distributed_queue1 + ); + + BOOST_CHECK( + distributed_equal(distributed_vector, value, distributed_queue1) + ); + BOOST_CHECK( + distributed_equal(distributed_vector_copy1, value, distributed_queue1) + ); + BOOST_CHECK( + distributed_equal(distributed_vector_copy2, value, distributed_queue2) + ); + BOOST_CHECK( + distributed_equal(distributed_vector_copy3, value, distributed_queue2) + ); + BOOST_CHECK( + distributed_equal(distributed_vector_copy4, value, distributed_queue1) + ); + BOOST_CHECK( + distributed_equal(distributed_vector_copy5, value, distributed_queue1) + ); +} + +BOOST_AUTO_TEST_CASE(at) +{ + // construct distributed::command_queue + bc::distributed::command_queue distributed_queue = + get_distributed_queue(queue); + + bc::distributed::vector distributed_vector( + size_t(64), bc::uint_(64), distributed_queue + ); + + distributed_vector.begin(1).write(33, distributed_queue.get(1)); + distributed_queue.get(1).finish(); + + BOOST_CHECK_EQUAL( + distributed_vector.at(distributed_vector.part_start(1)), + bc::uint_(33) + ); + BOOST_CHECK_EQUAL( + distributed_vector.at(distributed_vector.part_start(1) + 1), + bc::uint_(64) + ); + BOOST_CHECK_EQUAL( + distributed_vector.at(distributed_vector.part_start(1) - 1), + bc::uint_(64) + ); + + distributed_vector.at(distributed_vector.part_start(1)) = 55; + BOOST_CHECK_EQUAL( + *distributed_vector.begin(1), + bc::uint_(55) + ); + + BOOST_CHECK_THROW(distributed_vector.at(64), std::out_of_range); +} + +BOOST_AUTO_TEST_CASE(subscript_operator) +{ + // construct distributed::command_queue + bc::distributed::command_queue distributed_queue = + get_distributed_queue(queue); + + bc::distributed::vector distributed_vector( + size_t(64), bc::uint_(64), distributed_queue + ); + + distributed_vector.begin(1).write(bc::uint_(33), distributed_queue.get(1)); + distributed_queue.get(1).finish(); + + BOOST_CHECK_EQUAL( + distributed_vector[distributed_vector.part_start(1)], + bc::uint_(33) + ); + BOOST_CHECK_EQUAL( + distributed_vector[distributed_vector.part_start(1) + 1], + bc::uint_(64) + ); + BOOST_CHECK_EQUAL( + distributed_vector[distributed_vector.part_start(1) - 1], + bc::uint_(64) + ); + + distributed_vector[distributed_vector.part_start(1)] = bc::uint_(55); + BOOST_CHECK_EQUAL( + *distributed_vector.begin(1), + bc::uint_(55) + ); +} + +BOOST_AUTO_TEST_CASE(swap) +{ + // construct distributed::command_queue + bc::distributed::command_queue distributed_queue1 = + get_distributed_queue(queue); + // construct 2nd distributed::command_queue + bc::distributed::command_queue distributed_queue2 = + get_distributed_queue(queue, 2); + + bc::int_ value1 = -88; + bc::int_ value2 = 99; + size_t size1 = 64; + size_t size2 = 48; + + bc::distributed::vector distributed_vector1( + size1, value1, distributed_queue1 + ); + bc::distributed::vector distributed_vector2( + size2, value2, distributed_queue2 + ); + + BOOST_CHECK_EQUAL(distributed_vector1.size(), size1); + BOOST_CHECK( + distributed_equal(distributed_vector1, value1, distributed_queue1) + ); + BOOST_CHECK_EQUAL(distributed_vector2.size(), size2); + BOOST_CHECK( + distributed_equal(distributed_vector2, value2, distributed_queue2) + ); + distributed_queue1.finish(); + distributed_queue2.finish(); + + distributed_vector1.swap(distributed_vector2); + + BOOST_CHECK_EQUAL(distributed_vector1.size(), size2); + BOOST_CHECK( + distributed_equal(distributed_vector1, value2, distributed_queue2) + ); + BOOST_CHECK_EQUAL(distributed_vector2.size(), size1); + BOOST_CHECK( + distributed_equal(distributed_vector2, value1, distributed_queue1) + ); +} + +BOOST_AUTO_TEST_SUITE_END()