From e8c0df8a5ef772453580c530a249d55af4600595 Mon Sep 17 00:00:00 2001 From: thomas retornaz Date: Mon, 26 Feb 2018 05:47:08 +0100 Subject: [PATCH 01/23] warning -- --- simdpp/dispatch/dispatcher.h | 4 +- simdpp/dispatch/get_arch_string_list.h | 6 +- test/utils/test_results_set.cc | 76 +++++++++++++------------- test/utils/test_results_set.h | 36 ++++++------ 4 files changed, 61 insertions(+), 61 deletions(-) diff --git a/simdpp/dispatch/dispatcher.h b/simdpp/dispatch/dispatcher.h index 03c2f6f2..1c638f3d 100644 --- a/simdpp/dispatch/dispatcher.h +++ b/simdpp/dispatch/dispatcher.h @@ -67,7 +67,7 @@ struct FnVersion { const char* arch_name; }; -inline FnVersion select_version_any(FnVersion* versions, unsigned size, +inline FnVersion select_version_any(FnVersion* versions, std::size_t size, Arch arch) { // No need to try to be very efficient here. @@ -76,7 +76,7 @@ inline FnVersion select_version_any(FnVersion* versions, unsigned size, return lhs.needed_arch > rhs.needed_arch; }); - unsigned i; + std::size_t i; for (i = 0; i < size; ++i) { if (versions[i].fun_ptr == nullptr) continue; diff --git a/simdpp/dispatch/get_arch_string_list.h b/simdpp/dispatch/get_arch_string_list.h index 1907892f..e4872085 100644 --- a/simdpp/dispatch/get_arch_string_list.h +++ b/simdpp/dispatch/get_arch_string_list.h @@ -84,10 +84,10 @@ inline Arch get_arch_string_list(const char* const strings[], int count, const c return res; #endif - int prefixlen = std::strlen(prefix); - for (int i = 0; i < count; ++i) { + auto prefixlen = std::strlen(prefix); + for (auto i = 0; i < count; ++i) { const char* s = *strings++; - int len = std::strlen(s); + auto len = std::strlen(s); // check if s matches prefix if (len < prefixlen) diff --git a/test/utils/test_results_set.cc b/test/utils/test_results_set.cc index 24ef4f9f..5d513e69 100644 --- a/test/utils/test_results_set.cc +++ b/test/utils/test_results_set.cc @@ -24,15 +24,15 @@ TestResultsSet::TestResultsSet(const char* name) : reset_seq(); } -TestResultsSet::Result& TestResultsSet::push(ElementType type, unsigned length, - const char* file, unsigned line) +TestResultsSet::Result& TestResultsSet::push(ElementType type, std::size_t length, + const char* file, std::size_t line) { results_.emplace_back(type, length, element_size_for_type(type), file, line, seq_++, curr_precision_ulp_, curr_fp_zero_equal_); return results_.back(); } -unsigned precision_for_result(const TestResultsSet::Result& res) +std::size_t precision_for_result(const TestResultsSet::Result& res) { switch (res.type) { case TYPE_FLOAT32: @@ -48,13 +48,13 @@ template<> struct fix_char_type { using type = int; }; template<> struct fix_char_type { using type = int; }; template -void print_hex(std::ostream& err, unsigned num_elems, unsigned width, +void print_hex(std::ostream& err, std::size_t num_elems, std::size_t width, const T* p) { static_assert(std::is_unsigned::value, "T must be unsigned"); err << "[ " << std::hex << std::setfill('0'); err.precision(width); - for (unsigned i = 0; i < num_elems; i++, p++) { + for (std::size_t i = 0; i < num_elems; i++, p++) { err << std::setw(width*2) << uint64_t(*p); if (i != num_elems - 1) { err << " ; "; @@ -65,12 +65,12 @@ void print_hex(std::ostream& err, unsigned num_elems, unsigned width, } template -void print_numeric(std::ostream& err, unsigned num_elems, unsigned precision, +void print_numeric(std::ostream& err, std::size_t num_elems, std::size_t precision, const T* p) { err << "[ "; err.precision(precision); - for (unsigned i = 0; i < num_elems; i++, p++) { + for (std::size_t i = 0; i < num_elems; i++, p++) { err << typename fix_char_type::type(*p); if (i != num_elems - 1) { err << " ; "; @@ -80,7 +80,7 @@ void print_numeric(std::ostream& err, unsigned num_elems, unsigned precision, err << std::dec; } -void print_vector_hex(std::ostream& out, ElementType type, unsigned num_elems, +void print_vector_hex(std::ostream& out, ElementType type, std::size_t num_elems, const void* data) { switch (type) { @@ -118,7 +118,7 @@ void print_vector_hex(std::ostream& out, ElementType type, unsigned num_elems, } void print_vector_numeric(std::ostream& out, ElementType type, - unsigned num_elems, const void* data) + std::size_t num_elems, const void* data) { switch (type) { case TYPE_UINT8: @@ -171,7 +171,7 @@ const char* vector_type_to_str(ElementType type) } } -void print_data_diff(std::ostream& out, ElementType type, unsigned num_elems, +void print_data_diff(std::ostream& out, ElementType type, std::size_t num_elems, const void* data_a, const void* data_b) { out << "type: " << vector_type_to_str(type) @@ -202,7 +202,7 @@ void print_file_info(std::ostream& out, const char* file) out << " In file \"" << file << "\" :\n"; } -void print_file_info(std::ostream& out, const char* file, unsigned line) +void print_file_info(std::ostream& out, const char* file, std::size_t line) { if (file == nullptr) { file = ""; @@ -220,12 +220,12 @@ void print_test_case_name(std::ostream& out, const char* name) out << " In test case \"" << name << "\" :\n"; } -void print_seq_num(std::ostream& out, unsigned num) +void print_seq_num(std::ostream& out, std::size_t num) { out << " Sequence number: " << num << "\n"; } -void print_precision(std::ostream& out, unsigned prec) +void print_precision(std::ostream& out, std::size_t prec) { if (prec > 0) { out << " Precision: " << prec << "ULP\n"; @@ -304,10 +304,10 @@ T nextafter_ulps(T from, T to) // T is either double or float template -bool cmpeq_arrays(const T* a, const T* b, unsigned num_elems, - unsigned prec, bool zero_eq) +bool cmpeq_arrays(const T* a, const T* b, std::size_t num_elems, + std::size_t prec, bool zero_eq) { - for (unsigned i = 0; i < num_elems; i++) { + for (std::size_t i = 0; i < num_elems; i++) { // we need to be extra-precise here. nextafter is used because it won't // introduce any rounding errors T ia = *a++; @@ -318,7 +318,7 @@ bool cmpeq_arrays(const T* a, const T* b, unsigned num_elems, if (zero_eq && is_zero_or_neg_zero(ia) && is_zero_or_neg_zero(ib)) { continue; } - for (unsigned i = 0; i < prec; i++) { + for (std::size_t i = 0; i < prec; i++) { ia = nextafter_ulps(ia, ib); } if (std::memcmp(&ia, &ib, sizeof(ia)) != 0) { @@ -345,12 +345,12 @@ const char* get_filename_from_results_set(const TestResultsSet& a, } struct TestSequence { - unsigned begin_index, end_index; + std::size_t begin_index, end_index; const char* begin_file; // For comparisons we want to strip the arch suffix from the file name. // To reduce the number of duplicate computations it is cached here. std::string begin_file_stripped; - unsigned begin_line; + std::size_t begin_line; }; bool is_test_seq_from_same_test(const TestSequence& a, const TestSequence& b) @@ -370,25 +370,25 @@ using TestSequenceList = std::vector; Returns true if test results were skipped, false otherwise. */ -bool skip_results_until_same_test(unsigned& ia, unsigned& ib, +bool skip_results_until_same_test(std::size_t& ia, std::size_t& ib, const TestSequenceList& a, const TestSequenceList& b) { if (is_test_seq_from_same_test(a[ia], b[ib])) return false; - unsigned max_skipped = a.size() - ia + b.size() - ib; + auto max_skipped = a.size() - ia + b.size() - ib; // This problem is solved by brute force as the number of skipped sequences // is very likely small. We evaluate all possible ways to skip sequences // starting with the smallest total number of skipped sequences. - for (unsigned num_skipped = 1; num_skipped < max_skipped; ++num_skipped) { + for (auto num_skipped = 1; num_skipped < max_skipped; ++num_skipped) { - for (unsigned i = 0; i <= num_skipped; ++i) { - unsigned skip_from_a = i; - unsigned skip_from_b = num_skipped - i; + for (auto i = 0; i <= num_skipped; ++i) { + auto skip_from_a = i; + auto skip_from_b = num_skipped - i; - unsigned new_ia = ia + skip_from_a; - unsigned new_ib = ib + skip_from_b; + auto new_ia = ia + skip_from_a; + auto new_ib = ib + skip_from_b; if (new_ia < a.size() && new_ib < b.size()) { if (is_test_seq_from_same_test(a[new_ia], b[new_ib])) { @@ -424,12 +424,12 @@ TestSequenceList build_test_sequences(const std::vector& TestSequence next_seq; - unsigned i = 0; + std::size_t i = 0; next_seq.begin_index = i; next_seq.begin_file = results[i].file; next_seq.begin_file_stripped = strip_arch_suffix_from_file(results[i].file); next_seq.begin_line = results[i].line; - unsigned last_seq_num = results[i].seq; + auto last_seq_num = results[i].seq; ++i; @@ -453,7 +453,7 @@ TestSequenceList build_test_sequences(const std::vector& } bool cmpeq_result(const TestResultsSet::Result& ia, const TestResultsSet::Result& ib, - unsigned fp_prec, bool fp_zero_eq) + std::size_t fp_prec, bool fp_zero_eq) { if (std::memcmp(ia.d(), ib.d(), ia.el_size * ia.length) == 0) { return true; @@ -491,8 +491,8 @@ void report_test_comparison(const TestResultsSet& a, const char* a_arch, TestSequenceList b_seqs = build_test_sequences(b.results()); // Compare results - unsigned ia_seq = 0; - unsigned ib_seq = 0; + std::size_t ia_seq = 0u; + std::size_t ib_seq = 0u; while (ia_seq < a_seqs.size() && ib_seq < b_seqs.size()) { if (skip_results_until_same_test(ia_seq, ib_seq, a_seqs, b_seqs)) { @@ -502,8 +502,8 @@ void report_test_comparison(const TestResultsSet& a, const char* a_arch, const auto& a_seq = a_seqs[ia_seq]; const auto& b_seq = b_seqs[ib_seq]; - unsigned a_seq_size = a_seq.end_index - a_seq.begin_index; - unsigned b_seq_size = b_seq.end_index - b_seq.begin_index; + std::size_t a_seq_size = a_seq.end_index - a_seq.begin_index; + std::size_t b_seq_size = b_seq.end_index - b_seq.begin_index; if (a_seq_size != b_seq_size) { print_separator(tr.out()); @@ -522,9 +522,9 @@ void report_test_comparison(const TestResultsSet& a, const char* a_arch, return; } - for (unsigned i = 0; i < a_seq_size; ++i) { - unsigned ia = a_seq.begin_index + i; - unsigned ib = b_seq.begin_index + i; + for (auto i = 0; i < a_seq_size; ++i) { + auto ia = a_seq.begin_index + i; + auto ib = b_seq.begin_index + i; const auto& a_res = a.results()[ia]; const auto& b_res = b.results()[ib]; @@ -573,7 +573,7 @@ void report_test_comparison(const TestResultsSet& a, const char* a_arch, return; } - unsigned prec = std::max(precision_for_result(a_res), + std::size_t prec = std::max(precision_for_result(a_res), precision_for_result(b_res)); bool fp_zero_eq = a_res.fp_zero_eq || b_res.fp_zero_eq; diff --git a/test/utils/test_results_set.h b/test/utils/test_results_set.h index e643398d..92b3bd1b 100644 --- a/test/utils/test_results_set.h +++ b/test/utils/test_results_set.h @@ -16,18 +16,18 @@ #include "element_type.h" // Prints two vectors side by side for comparison -void print_data_diff(std::ostream& out, ElementType type, unsigned num_elems, +void print_data_diff(std::ostream& out, ElementType type, std::size_t num_elems, const void* data_a, const void* data_b); void print_separator(std::ostream& out); void print_file_info(std::ostream& out, const char* file); -void print_file_info(std::ostream& out, const char* file, unsigned line); +void print_file_info(std::ostream& out, const char* file, std::size_t line); -void print_vector_hex(std::ostream& out, ElementType type, unsigned num_elems, +void print_vector_hex(std::ostream& out, ElementType type, std::size_t num_elems, const void* data); void print_vector_numeric(std::ostream& out, ElementType type, - unsigned num_elems, const void* data); + std::size_t num_elems, const void* data); /** The class represents test results for certain instruction set. We later compare the results with other instruction sets and assume that all @@ -40,11 +40,11 @@ class TestResultsSet { // Holds one result vector struct Result { - static const unsigned num_bytes = 32; + static const std::size_t num_bytes = 32; - Result(ElementType atype, unsigned alength, unsigned ael_size, - const char* afile, unsigned aline, unsigned aseq, - unsigned aprec_ulp, bool afp_zero_eq) + Result(ElementType atype, std::size_t alength, std::size_t ael_size, + const char* afile, std::size_t aline, std::size_t aseq, + std::size_t aprec_ulp, bool afp_zero_eq) { type = atype; file = afile; @@ -58,15 +58,15 @@ class TestResultsSet { } ElementType type; - unsigned line; - unsigned seq; - unsigned prec_ulp; + std::size_t line; + std::size_t seq; + std::size_t prec_ulp; bool fp_zero_eq; const char* file; - unsigned length; - unsigned el_size; + std::size_t length; + std::size_t el_size; - void set(unsigned id, void* adata) + void set(std::size_t id, void* adata) { std::memcpy(data.data() + id*el_size, adata, el_size); } @@ -80,11 +80,11 @@ class TestResultsSet { }; /// Stores the results into the results set. - Result& push(ElementType type, unsigned length, const char* file, unsigned line); + Result& push(ElementType type, std::size_t length, const char* file, std::size_t line); /// Sets the allowed error in ULPs. Only meaningful for floating-point data. /// Affects all pushed data until the next call to @a unset_precision - void set_precision(unsigned num_ulp) { curr_precision_ulp_ = num_ulp; } + void set_precision(std::size_t num_ulp) { curr_precision_ulp_ = num_ulp; } void unset_precision() { curr_precision_ulp_ = 0; } /// Sets whether floating-point zero and negative zero are considered @@ -106,8 +106,8 @@ class TestResultsSet { TestResultsSet(const char* name); const char* name_; - unsigned seq_; - unsigned curr_precision_ulp_; + std::size_t seq_; + std::size_t curr_precision_ulp_; bool curr_fp_zero_equal_; std::vector results_; From d70bbc359820baa7026c0e4f6856af7adcd09e1d Mon Sep 17 00:00:00 2001 From: thomas retornaz Date: Mon, 26 Feb 2018 05:48:32 +0100 Subject: [PATCH 02/23] fix TestData& operator=(const TestData& other) assignment operator --- test/utils/test_helpers.h | 1 + 1 file changed, 1 insertion(+) diff --git a/test/utils/test_helpers.h b/test/utils/test_helpers.h index e6617964..7078ce9a 100644 --- a/test/utils/test_helpers.h +++ b/test/utils/test_helpers.h @@ -90,6 +90,7 @@ class TestData { TestData& operator=(const TestData& other) { data_ = other.data_; + return (*this); } template From 8d91b7e9d0fe2dfaa76a470d6fdb357ff2f4fe8e Mon Sep 17 00:00:00 2001 From: thomas retornaz Date: Mon, 26 Feb 2018 05:50:07 +0100 Subject: [PATCH 03/23] wip issue #107 add transform/reduce algorithm --- doc/wiki/Main_Page.mwiki | 8 +- doc/wiki/algorithm/reduce.mwiki | 38 +++++ doc/wiki/algorithm/transform.mwiki | 56 +++++++ simdpp/algorithm/helper_input_range.h | 56 +++++++ simdpp/algorithm/reduce.h | 123 ++++++++++++++ simdpp/algorithm/transform.h | 172 ++++++++++++++++++++ simdpp/detail/align.h | 19 +++ simdpp/types/traits.h | 220 +++++++++++++++++++------- test/insn/reduce.cc | 134 ++++++++++++++++ test/insn/transform.cc | 170 ++++++++++++++++++++ 10 files changed, 938 insertions(+), 58 deletions(-) create mode 100644 doc/wiki/algorithm/reduce.mwiki create mode 100644 doc/wiki/algorithm/transform.mwiki create mode 100644 simdpp/algorithm/helper_input_range.h create mode 100644 simdpp/algorithm/reduce.h create mode 100644 simdpp/algorithm/transform.h create mode 100644 test/insn/reduce.cc create mode 100644 test/insn/transform.cc diff --git a/doc/wiki/Main_Page.mwiki b/doc/wiki/Main_Page.mwiki index 6df4ee6c..246b651e 100644 --- a/doc/wiki/Main_Page.mwiki +++ b/doc/wiki/Main_Page.mwiki @@ -20,7 +20,7 @@ various variants of matrix transpositions, interleaving loads/stores, optimized compile-time shuffling instructions, etc. Each of these are implemented in the most efficient manner for the target instruction set. Finally, it's possible to fall back to native intrinsics when necessary, without compromising -maintanability. +maintainability. The library sits somewhere in the middle between programming directly in @@ -173,6 +173,12 @@ the compiler will generate. {{ltt|misc/prefetch_read}}  {{ltt|misc/prefetch_write}} +'''[[Algorithm | STL like algorithm]]''' +
+{{ltt|algorithm/transform}}
+{{ltt|algorithm/reduce}}
+
+ |- class="row rowbottom" | colspan=4| |} diff --git a/doc/wiki/algorithm/reduce.mwiki b/doc/wiki/algorithm/reduce.mwiki new file mode 100644 index 00000000..a916d1e4 --- /dev/null +++ b/doc/wiki/algorithm/reduce.mwiki @@ -0,0 +1,38 @@ +{{simdpp/title|reduce}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | num=1 | +template + T reduce(T const* first, T const* last, T init); +}} +{{dcl | num=2 | +template + T reduce(T const* first, T const* last, T init, T neutral, BinOp f); +}} +{{dcl end}} +{{misc/navbar}} + +1) Computes the sum over elements in the given Range [first,last) and the initial value init. +2) Reduces the range [first,last), possibly permuted and aggregated in unspecified manner, along with the initial value init over binary_op. + +===Notes=== +The behavior is non-deterministic if binary_op is not associative or not commutative. + +If the range is empty, {{tt|init}} is returned, unmodified + +===Parameters=== +{{par begin}} +{{par | first, last | the range of elements to apply the algorithm to}} +{{par | init | the initial value of the generalized sum}} +{{par | binary_op | binary FunctionObject that will be applied in unspecified order}} +{{par | neutral | Value containing the neutral element of BinOp}} +{{par end}} + +===Return value=== +Generalized sum of {{tt|init}} and {{tt|*first}}, {{tt|*(first+1)}}, ... {{tt|*(last-1)}} over {{tt|binary_op}}, +in other words, {{tt|reduce}} behaves like {{lc|accumulate}} except the elements of the range may be grouped and rearranged in arbitrary order + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc transform }} +{{dsc end}} \ No newline at end of file diff --git a/doc/wiki/algorithm/transform.mwiki b/doc/wiki/algorithm/transform.mwiki new file mode 100644 index 00000000..ba716212 --- /dev/null +++ b/doc/wiki/algorithm/transform.mwiki @@ -0,0 +1,56 @@ +{{simdpp/title|transform}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | num=1 | +template + U* transform(T const* first1, T const* last1, U* out, UnOp f); +}} +{{dcl | num=2 | +template + U* transform(T1 const* first1, T1 const* last1, T2 const* first2, U* out, BinOp f); +}} +{{dcl end}} +{{misc/navbar}} + +{{tt|transform}} applies the given function to a range and stores the result in another range, beginning at out. + +1) The unary operation unary_op is applied to the range defined by [first1, last1). +2) The binary operation binary_op is applied to pairs of elements from two ranges: one defined by [first1, last1) and the other beginning at first2. + +===Parameters=== +{{par begin}} +{{par | first1, last1 | the first range of elements to transform}} +{{par | first2 | the beginning of the second range of elements to transform}} +{{par | out | the beginning of the destination range, may be equal to first1 or first2}} +{{par | UnOp | unary operation function object that will be applied.}} +{{par | BinOp | binary operation function object that will be applied.}} +{{par end}} + +===Return value=== +(out) + +===Notes=== +Notes +transform does not guarantee in-order application of unary_op or binary_op. +To apply a function to a sequence in-order or to apply a function that modifies the elements of a sequence, use for_each + +===Equivalent operation=== +{{source|1= +while (first1 != last1) { + *out++ = UnOp(*first1++); + } + return out; +}} + +{{source|2= +while (first1 != last1) { + *out++ = BinOp(*first1++, *first2++); + } + return out; +}} + +===See also=== +{{dsc begin}} +{{dsc inc | misc/dsc for_each }} +{{dsc inc | algorithm/dsc reduce }} +{{dsc end}} diff --git a/simdpp/algorithm/helper_input_range.h b/simdpp/algorithm/helper_input_range.h new file mode 100644 index 00000000..b4c81468 --- /dev/null +++ b/simdpp/algorithm/helper_input_range.h @@ -0,0 +1,56 @@ +/* Copyright (C) 2018 Povilas Kanapickas + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_HELPER_INPUT_RANGE_H +#define LIBSIMDPP_SIMDPP_HELPER_INPUT_RANGE_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include +#include +#include + +namespace simdpp { + namespace SIMDPP_ARCH_NAMESPACE { + /** + Extract from contigous range [first,last[ + The two loop counter + -the scalar prologue [start,size_prologue_loop[ i.e the range defined between the original begin and the first + location to be properly aligned to be used through simd operators + - the main simd_loop_part,[size_prologue_loop,size_simd_loop[ i.e the range where we could apply simd operators + - Note epilogue equals [size_simd_loop,stop[ + */ + template + const std::pair helper_input_range(const T* first, const T* last) + { +#ifndef NDEBUG //precondition debug mode + if (!first) + throw std::runtime_error("helper_input_range - null ptr first."); + if (!last) + throw std::runtime_error("helper_input_range - null ptr last."); +#endif + using simd_type_T = typetraits::simd_type; + + const auto simd_size = simd_type_T::base_length; + const auto alignment = typetraits::alignment; + + const auto size = std::distance(first, last); + //get first aligned adress from first + const T* ptr_aligned_first =(T*)detail::reach_next_aligned((void*)first, alignment); + // Next aligned address may be out of range, so make sure size_prologue_loop is not bigger than size + const auto size_prologue_loop = std::min(size,std::distance(first, ptr_aligned_first)); + const auto size_simd_loop = (size >= size_prologue_loop) ? (simd_size * ((size- size_prologue_loop) / simd_size)) : (0u); + + return std::make_pair(size_prologue_loop, size_simd_loop); + } + + } // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp + +#endif //LIBSIMDPP_SIMDPP_HELPER_INPUT_RANGE_H diff --git a/simdpp/algorithm/reduce.h b/simdpp/algorithm/reduce.h new file mode 100644 index 00000000..19978957 --- /dev/null +++ b/simdpp/algorithm/reduce.h @@ -0,0 +1,123 @@ +/* Copyright (C) 2018 Povilas Kanapickas + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_REDUCE_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_REDUCE_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include +#include +#include +#include +#include +#include +#include + +namespace simdpp { + namespace SIMDPP_ARCH_NAMESPACE { + + template + T reduce(T const* first, T const* last, T init) + { +#ifndef NDEBUG //precondition debug mode + if (!first) + throw std::runtime_error("reduce - null ptr first."); + if (!last) + throw std::runtime_error("reduce - null ptr last."); +#endif + using simd_type_T = typetraits::simd_type; + const auto alignment = typetraits::alignment; + + simd_type_T accusimd = splat((T)0); + + //Define loop counter + const auto simd_size = simd_type_T::base_length; + const auto size = std::distance(first, last); + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + auto i = 0u; + //---prologue + for (; i < size_prologue_loop; ++i) + { + init += *first++; + } + //---main simd loop + for (; i < size_simd_loop; i += simd_size) + { + simd_type_T element = load(first); + accusimd = accusimd + element; //TODO need += + first += simd_size; + } + + + //---epilogue + for (; i < size; ++i) + { + init += *first++; + } + + //sum simd residual + init += reduce_add(accusimd); + return init; + } + + template + T reduce(T const* first, T const* last, T init, T neutral, BinOp f) //need neutral element for simd part + { +#ifndef NDEBUG //precondition debug mode + if (!first) + throw std::runtime_error("reduce - null ptr first."); + if (!last) + throw std::runtime_error("reduce - null ptr last."); +#endif + using simd_type_T = typetraits::simd_type; + const auto alignment = typetraits::alignment; + + //Define loop counter + const auto size = std::distance(first, last); + const auto simd_size = simd_type_T::base_length; + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + auto i = 0u; + simd_type_T accusimd = splat(T(neutral)); //think about product sum + + //---prologue + for (; i < size_prologue_loop; ++i) + { + init = f(init, *first++); + } + //---main simd loop + for (; i < size_simd_loop; i += simd_size) + { + simd_type_T element = load(first); + accusimd = f(accusimd, element); + first += simd_size; + } + //---epilogue + for (; i < size; ++i) + { + init = f(init, *first++); + } + + //reduce simd residual + for_each(accusimd, [&](T el) { init = f(init, el); }); + return init; + } + } // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp + +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_REDUCE_H + diff --git a/simdpp/algorithm/transform.h b/simdpp/algorithm/transform.h new file mode 100644 index 00000000..54d3a7e6 --- /dev/null +++ b/simdpp/algorithm/transform.h @@ -0,0 +1,172 @@ +/* Copyright (C) 2018 Povilas Kanapickas + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_TRANSFORM_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_TRANSFORM_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace simdpp { + namespace SIMDPP_ARCH_NAMESPACE { + + template + U* transform(T const* first, T const* last, U* out, UnOp f) + { +#ifndef NDEBUG //precondition debug mode + if (!first) + throw std::runtime_error("transform - null ptr first."); + if (!last) + throw std::runtime_error("transform - null ptr last."); + if (!out) + throw std::runtime_error("transform - null ptr out."); +#endif + using simd_type_T = typetraits::simd_type; + using simd_type_U = typetraits::simd_type; + + static_assert (simd_type_T::base_length == simd_type_U::base_length + , "mismatch base_length between T and U" + ); + + const auto alignment = typetraits::alignment; + + //Define loop counter + const auto simd_size = simd_type_T::base_length; + const auto size = std::distance(first, last); + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + + auto i = 0u; + + //---prologue + for (; i < size_prologue_loop; ++i) + { + *out++ = f(*first++); + } + //---main simd loop + if (detail::is_aligned(out, alignment)) //TODO reach the first aligned adress + { + for (; i < size_simd_loop; i += simd_size) + { + simd_type_T element = load(first); + store(out, f(element)); + first += simd_size; + out += simd_size; + } + } + else + { + for (; i < size_simd_loop; i += simd_size) + { + simd_type_T element = load(first); + store_u(out, f(element)); + first += simd_size; + out += simd_size; + } + } + //---epilogue + for (; i < size; ++i) + { + *out++ = f(*first++); + } + return out; + } + template + U* transform(T1 const* first1, T1 const* last1, T2 const* first2, U* out, BinOp f) + { +#ifndef NDEBUG //precondition debug mode + if (!first1) + throw std::runtime_error("transform - null ptr first1."); + if (!last1) + throw std::runtime_error("transform - null ptr last1."); + if (!first2) + throw std::runtime_error("transform - null ptr first2."); + if (!out) + throw std::runtime_error("transform - null ptr out."); +#endif + using simd_type_T1 = typetraits::simd_type; + using simd_type_T2 = typetraits::simd_type; + using simd_type_U = typetraits::simd_type; + + static_assert (simd_type_T1::base_length == simd_type_T2::base_length + , "mismatch base_length between T1 and T2" + ); + static_assert (simd_type_T1::base_length == simd_type_U::base_length + , "mismatch base_length between T1 and U" + ); + static_assert (simd_type_T2::base_length == simd_type_U::base_length + , "mismatch base_length between T2 and U" + ); + + auto alignment = typetraits::alignment; + + //Define loop counter + const auto simd_size = simd_type_T1::base_length; + const auto size = std::distance(first1, last1); + const auto range = helper_input_range(first1, last1); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + + auto i = 0u; + + //---prologue + for (; i < size_prologue_loop; ++i) + { + *out++ = f(*first1++, *first2++); + } + + //---main simd loop + if (detail::is_aligned(first1, alignment) && detail::is_aligned(first2, alignment) && detail::is_aligned(out, alignment))//TODO reach the first aligned adress + { + for (; i < size_simd_loop; i += simd_size) + { + simd_type_T1 element1 = load(first1); + simd_type_T2 element2 = load(first2); + store(out, f(element1, element2)); + first1 += simd_size; + first2 += simd_size; + out += simd_size; + } + } + else + { + for (; i < size_simd_loop; i += simd_size) + { + simd_type_T1 element1 = load_u(first1); + simd_type_T2 element2 = load_u(first2); + store_u(out, f(element1, element2)); + first1 += simd_size; + first2 += simd_size; + out += simd_size; + } + } + //---epilogue + for (; i < size; ++i) + { + *out++ = f(*first1++, *first2++); + } + return out; + } + } // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp + +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_TRANSFORM_H + diff --git a/simdpp/detail/align.h b/simdpp/detail/align.h index a90a5579..504dfa1c 100644 --- a/simdpp/detail/align.h +++ b/simdpp/detail/align.h @@ -13,6 +13,7 @@ #endif #include +#include namespace simdpp { namespace SIMDPP_ARCH_NAMESPACE { @@ -42,6 +43,24 @@ const T* assume_aligned(const T* x, unsigned bytes) #endif } +SIMDPP_INL bool is_aligned(const void* ptr, std::size_t A) noexcept +{ + assert(((A & (A - 1)) == 0)); + return ((std::size_t)ptr & (A - 1)) == 0; //from boost\align\detail\is_aligned.hpp +} + +SIMDPP_INL bool is_aligned(std::size_t val, std::size_t A) noexcept +{ + assert(((A & (A - 1)) == 0)); + return (val & (A - 1)) == 0; //from boost\align\detail\is_aligned.hpp +} + +SIMDPP_INL void* reach_next_aligned(void* ptr, std::size_t A) noexcept +{ + assert(((A & (A - 1)) == 0)); + return (void*)(((std::size_t)ptr + A - 1) &~(A - 1)); //from boost\align\detail\align_up.hpp +} + } // namespace detail } // namespace SIMDPP_ARCH_NAMESPACE } // namespace simdpp diff --git a/simdpp/types/traits.h b/simdpp/types/traits.h index a1302e26..a0507995 100644 --- a/simdpp/types/traits.h +++ b/simdpp/types/traits.h @@ -9,68 +9,174 @@ #define LIBSIMDPP_SIMDPP_TYPES_TRAITS_H #ifndef LIBSIMDPP_SIMD_H - #error "This file must be included through simd.h" +#error "This file must be included through simd.h" #endif #include #include + namespace simdpp { -namespace SIMDPP_ARCH_NAMESPACE { - -/// Allows detection whether specific type is a simdpp vector -template -struct is_vector : std::false_type {}; - -template struct is_vector> : std::true_type {}; -template struct is_vector> : std::true_type {}; -template struct is_vector> : std::true_type {}; -template struct is_vector> : std::true_type {}; -template struct is_vector> : std::true_type {}; -template struct is_vector> : std::true_type {}; -template struct is_vector> : std::true_type {}; -template struct is_vector> : std::true_type {}; -template struct is_vector> : std::true_type {}; -template struct is_vector> : std::true_type {}; -template struct is_vector> : std::true_type {}; -template struct is_vector> : std::true_type {}; -template struct is_vector> : std::true_type {}; -template struct is_vector> : std::true_type {}; -template struct is_vector> : std::true_type {}; -template struct is_vector> : std::true_type {}; - -/// Allows detection whether specific type is a simdpp value (i.e. not expression) vector -template -struct is_value_vector : std::false_type {}; - -template struct is_value_vector> : std::true_type {}; -template struct is_value_vector> : std::true_type {}; -template struct is_value_vector> : std::true_type {}; -template struct is_value_vector> : std::true_type {}; -template struct is_value_vector> : std::true_type {}; -template struct is_value_vector> : std::true_type {}; -template struct is_value_vector> : std::true_type {}; -template struct is_value_vector> : std::true_type {}; -template struct is_value_vector> : std::true_type {}; -template struct is_value_vector> : std::true_type {}; -template struct is_value_vector> : std::true_type {}; -template struct is_value_vector> : std::true_type {}; -template struct is_value_vector> : std::true_type {}; -template struct is_value_vector> : std::true_type {}; -template struct is_value_vector> : std::true_type {}; -template struct is_value_vector> : std::true_type {}; - -/// Allows detection whether specific type is a simdpp mask -template -struct is_mask : std::false_type {}; - -template struct is_mask> : std::true_type {}; -template struct is_mask> : std::true_type {}; -template struct is_mask> : std::true_type {}; -template struct is_mask> : std::true_type {}; -template struct is_mask> : std::true_type {}; -template struct is_mask> : std::true_type {}; - -} // namespace SIMDPP_ARCH_NAMESPACE + namespace SIMDPP_ARCH_NAMESPACE { + + /// Allows detection whether specific type is a simdpp vector + template + struct is_vector : std::false_type {}; + + template struct is_vector> : std::true_type {}; + template struct is_vector> : std::true_type {}; + template struct is_vector> : std::true_type {}; + template struct is_vector> : std::true_type {}; + template struct is_vector> : std::true_type {}; + template struct is_vector> : std::true_type {}; + template struct is_vector> : std::true_type {}; + template struct is_vector> : std::true_type {}; + template struct is_vector> : std::true_type {}; + template struct is_vector> : std::true_type {}; + template struct is_vector> : std::true_type {}; + template struct is_vector> : std::true_type {}; + template struct is_vector> : std::true_type {}; + template struct is_vector> : std::true_type {}; + template struct is_vector> : std::true_type {}; + template struct is_vector> : std::true_type {}; + + /// Allows detection whether specific type is a simdpp value (i.e. not expression) vector + template + struct is_value_vector : std::false_type {}; + + template struct is_value_vector> : std::true_type {}; + template struct is_value_vector> : std::true_type {}; + template struct is_value_vector> : std::true_type {}; + template struct is_value_vector> : std::true_type {}; + template struct is_value_vector> : std::true_type {}; + template struct is_value_vector> : std::true_type {}; + template struct is_value_vector> : std::true_type {}; + template struct is_value_vector> : std::true_type {}; + template struct is_value_vector> : std::true_type {}; + template struct is_value_vector> : std::true_type {}; + template struct is_value_vector> : std::true_type {}; + template struct is_value_vector> : std::true_type {}; + template struct is_value_vector> : std::true_type {}; + template struct is_value_vector> : std::true_type {}; + template struct is_value_vector> : std::true_type {}; + template struct is_value_vector> : std::true_type {}; + + /// Allows detection whether specific type is a simdpp mask + template + struct is_mask : std::false_type {}; + + template struct is_mask> : std::true_type {}; + template struct is_mask> : std::true_type {}; + template struct is_mask> : std::true_type {}; + template struct is_mask> : std::true_type {}; + template struct is_mask> : std::true_type {}; + template struct is_mask> : std::true_type {}; + + + /// Define typetraits + template + struct typetraits + { + static const size_t alignment = std::alignment_of::value; + }; + + /// typetraits int8_t + template<> + struct typetraits + { + static const size_t fast_size = SIMDPP_FAST_INT8_SIZE; + using simd_type = int8; + using simd_mask_type = mask_int8; + static const size_t alignment = fast_size; + }; + /// typetraits uint8_t + template<> + struct typetraits + { + static const size_t fast_size = SIMDPP_FAST_INT8_SIZE; + using simd_type = uint8; + using simd_mask_type = mask_int8; + static const size_t alignment = fast_size; + }; + + /// typetraits int16_t + template<> + struct typetraits + { + static const size_t fast_size = SIMDPP_FAST_INT16_SIZE; + using simd_type = int16; + using simd_mask_type = mask_int16; + static const size_t alignment = fast_size * 2; + }; + /// typetraits uint16_t + template<> + struct typetraits + { + static const size_t fast_size = SIMDPP_FAST_INT16_SIZE; + using simd_type = uint16; + using simd_mask_type = mask_int16; + static const size_t alignment = fast_size * 2; + }; + + /// typetraits int32_t + template<> + struct typetraits + { + static const size_t fast_size = SIMDPP_FAST_INT32_SIZE; + using simd_type = int32; + using simd_mask_type = mask_int32; + static const size_t alignment = fast_size * 4; + }; + /// typetraits uint32_t + template<> + struct typetraits + { + static const size_t fast_size = SIMDPP_FAST_INT32_SIZE; + using simd_type = uint32; + using simd_mask_type = mask_int32; + static const size_t alignment = fast_size * 4; + }; + + /// typetraits int64_t + template<> + struct typetraits + { + static const size_t fast_size = SIMDPP_FAST_INT64_SIZE; + using simd_type = int64; + using simd_mask_type = mask_int64; + static const size_t alignment = fast_size * 8; + }; + + /// typetraits uint64_t + template<> + struct typetraits + { + static const size_t fast_size = SIMDPP_FAST_INT64_SIZE; + using simd_type = uint64; + using simd_mask_type = mask_int64; + static const size_t alignment = fast_size * 8; + }; + + /// typetraits float32 + template<> + struct typetraits + { + static const size_t fast_size = SIMDPP_FAST_FLOAT32_SIZE; + using simd_type = float32; + using simd_mask_type = mask_float32; + static const size_t alignment = fast_size * 4; + }; + + /// typetraits float64 + template<> + struct typetraits + { + static const size_t fast_size = SIMDPP_FAST_FLOAT64_SIZE; + using simd_type = float64; + using simd_mask_type = mask_float64; + static const size_t alignment = fast_size * 8; + }; + + } // namespace SIMDPP_ARCH_NAMESPACE } // namespace simdpp #endif diff --git a/test/insn/reduce.cc b/test/insn/reduce.cc new file mode 100644 index 00000000..7c108cc7 --- /dev/null +++ b/test/insn/reduce.cc @@ -0,0 +1,134 @@ +/* Copyright (C) 2018 Povilas Kanapickas + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include + +namespace SIMDPP_ARCH_NAMESPACE { + template< typename T> + struct BinaryOpMul + { + public: + BinaryOpMul() {} + SIMDPP_INL T operator()(T const &a0, T const &a1) const noexcept + { + return a0 * a1; + } + + template + SIMDPP_INL U operator()(U const &a0, U const &a1) const noexcept + { + return a0*a1; + } + }; + + template + void test_reduce_type(TestResultsSet& ts, TestReporter& tr) + { + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + { //test prologue + vector_t ivect = { 0,41 }; + T expected = {42}; + T init = { 1 }; + T res=reduce(ivect.data(), ivect.data() + ivect.size(),init); + TEST_EQUAL(tr, expected,res); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(42, 1); + T expected = { 42 }; + T init = { 0 }; + T res = reduce(ivect.data(), ivect.data() + ivect.size(), init); + TEST_EQUAL(tr, expected, res); + } + { //test main loop and epilogue on range + vector_aligned_t ivect(150, 1); + T expected = { 42 }; + T init = { 0 }; + T res = reduce(ivect.data()+8u, ivect.data() + ivect.size()-100u, init); + TEST_EQUAL(tr, expected, res); + } + } + + template + void test_reducebinop_type(TestResultsSet& ts, TestReporter& tr) + { + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + auto opMul = BinaryOpMul(); + + { //test prologue + vector_t ivect = { 1,42 }; + T expected = { 42 }; + T init = { 1 }; + T neutral = { 1 }; + T res = reduce(ivect.data(), ivect.data() + ivect.size(), init,neutral,opMul); + TEST_EQUAL(tr, expected, res); + } + { //test main loop and epilogue on unaligned vector + vector_t ivect(150, 1); + T expected = { 42 }; + T init = { 42 }; + T neutral = { 1 }; + T res = reduce(ivect.data(), ivect.data() + ivect.size(), init, neutral, opMul); + TEST_EQUAL(tr, expected, res); + } + { //test main loop and epilogue on aligned vector check mul + vector_aligned_t ivect(10); + std::iota(begin(ivect), end(ivect), (T)1.); + T expected = { 3628800 };//aka 10! + T init = { 1 }; + T neutral = { 1 }; + T res = reduce(ivect.data(), ivect.data() + ivect.size(), init, neutral, opMul); + TEST_EQUAL(tr, expected, res); + } + { //test main loop and epilogue on aligned vector on range check mul + vector_aligned_t ivect(10); + std::iota(begin(ivect), end(ivect), (T)1.); + T expected = { 840}; //4*5*6*7 + T init = { 1 }; + T neutral = { 1 }; + T res = reduce(ivect.data() + 3, ivect.data() + ivect.size() - 3, init, neutral, opMul); + TEST_EQUAL(tr, expected, res); + } + } + + void test_reduce(TestResults& res, TestReporter& tr) + { + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("reduce"); + test_reduce_type(ts, tr); + test_reduce_type(ts, tr); + test_reduce_type(ts, tr); + test_reduce_type(ts, tr); + test_reduce_type(ts, tr); + test_reduce_type(ts, tr); + test_reduce_type(ts, tr); + test_reduce_type(ts, tr); + test_reduce_type(ts, tr); + test_reduce_type(ts, tr); + + test_reducebinop_type(ts, tr); + test_reducebinop_type(ts, tr); + //TR Why no operator * for above types at least for uint64_t and int64_t + //test_reducebinop_type(ts, tr); + //test_reducebinop_type(ts, tr); + //test_reducebinop_type(ts, tr); + //test_reducebinop_type(ts, tr); + //test_reducebinop_type(ts, tr); + //test_reducebinop_type(ts, tr); + //test_reducebinop_type(ts, tr); + //test_reducebinop_type(ts, tr); + + } + +} // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/transform.cc b/test/insn/transform.cc new file mode 100644 index 00000000..0833d675 --- /dev/null +++ b/test/insn/transform.cc @@ -0,0 +1,170 @@ +/* Copyright (C) 2018 Povilas Kanapickas + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include + +namespace SIMDPP_ARCH_NAMESPACE { + template< typename T> + struct UnaryOpAddValue + { + T m_val; + public: + UnaryOpAddValue(T val) :m_val(val) {} + SIMDPP_INL T operator()(T const &a) const noexcept + { + return m_val + a; + } + + template + SIMDPP_INL U operator()(U const &a) const noexcept + { + return m_val + a; + } + }; + + template< typename T> + struct BinaryOpAdd + { + public: + BinaryOpAdd() {} + SIMDPP_INL T operator()(T const &a0, T const &a1) const noexcept + { + return a0 + a1; + } + + template + SIMDPP_INL U operator()(U const &a0, U const &a1) const noexcept + { + using namespace simdpp; + return a0 + a1; + } + }; + + + template + void test_transform_type_unary(TestResultsSet& ts, TestReporter& tr) + { + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + auto opPlusOne = UnaryOpAddValue(1); + { //test prologue + vector_t ivect = { 0,1 }; + vector_t ovect(2); + vector_t expected = { 1,2 }; + + transform(ivect.data(), ivect.data() + ivect.size(), ovect.data(), opPlusOne); + for (auto i = 0; i < ovect.size(); ++i) //TODO make TEST_EQUAL_COLECTIONS + { + TEST_EQUAL(tr, expected[i], ovect[i]); + } + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(150, 0); + vector_aligned_t ovect(150); + vector_t expected(150, 1); + + transform(ivect.data(), ivect.data() + ivect.size(), ovect.data(), opPlusOne); + for (auto i = 0; i < ovect.size(); ++i) //TODO make TEST_EQUAL_COLECTIONS + { + TEST_EQUAL(tr, expected[i], ovect[i]); + } + } + { //test main loop and epilogue on range + vector_aligned_t ivect(150, 0); + vector_aligned_t ovect(150); + vector_t expected(150, 1); + + transform(ivect.data() + 10u, ivect.data() + ivect.size() - 10u, ovect.data() + 10u, opPlusOne); + for (auto i = 10; i < ovect.size() - 10u; ++i) //TODO make TEST_EQUAL_COLECTIONS + { + TEST_EQUAL(tr, expected[i], ovect[i]); + } + } + + + + } + template + void test_transform_type_binary(TestResultsSet& ts, TestReporter& tr) + { + using namespace simdpp; + using vector_t = std::vector; + using vector_u = std::vector; + using vector_aligned_t = std::vector::alignment>>; + using vector_aligned_u = std::vector::alignment>>; + auto opPlus = BinaryOpAdd(); + { //test prologue + vector_t ivect1 = { 0,1 }; + vector_u ivect2 = { 1,2 }; + vector_t ovect(2); + vector_t expected = { 1,3 }; + + transform(ivect1.data(), ivect1.data() + ivect1.size(), ivect2.data(), ovect.data(), opPlus); + for (auto i = 0; i < ovect.size(); ++i) //TODO make TEST_EQUAL_COLECTIONS + { + TEST_EQUAL(tr, expected[i], ovect[i]); + } + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect1(150, 0); + vector_aligned_t ivect2(150, 1); + vector_aligned_t ovect(150); + vector_t expected(150, 1); + + transform(ivect1.data(), ivect1.data() + ivect1.size(), ivect2.data(), ovect.data(), opPlus); + for (auto i = 0; i < ovect.size(); ++i) //TODO make TEST_EQUAL_COLECTIONS + { + TEST_EQUAL(tr, expected[i], ovect[i]); + } + } + { //test main loop and epilogue on range + vector_aligned_t ivect1(150, 0); + vector_aligned_t ivect2(150, 1); + vector_aligned_t ovect(150); + vector_t expected(150, 1); + + transform(ivect1.data() + 10u, ivect1.data() + ivect1.size() - 10u, ivect2.data() + 10u, ovect.data() + 10u, opPlus); + for (auto i = 10u; i < ovect.size() - 10u; ++i) //TODO make TEST_EQUAL_COLECTIONS + { + TEST_EQUAL(tr, expected[i], ovect[i]); + } + } + } + + void test_transform(TestResults& res, TestReporter& tr) + { + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("transform"); + test_transform_type_unary(ts, tr); + test_transform_type_unary(ts, tr); + test_transform_type_unary(ts, tr); + test_transform_type_unary(ts, tr); + test_transform_type_unary(ts, tr); + test_transform_type_unary(ts, tr); + test_transform_type_unary(ts, tr); + test_transform_type_unary(ts, tr); + test_transform_type_unary(ts, tr); + test_transform_type_unary(ts, tr); + + test_transform_type_binary(ts, tr); + test_transform_type_binary(ts, tr); + test_transform_type_binary(ts, tr); + test_transform_type_binary(ts, tr); + test_transform_type_binary(ts, tr); + test_transform_type_binary(ts, tr); + test_transform_type_binary(ts, tr); + test_transform_type_binary(ts, tr); + test_transform_type_binary(ts, tr); + test_transform_type_binary(ts, tr); + + } + +} // namespace SIMDPP_ARCH_NAMESPACE From c679f813b2256d957f6e78896c57d15d3dd6fa1d Mon Sep 17 00:00:00 2001 From: thomas retornaz Date: Mon, 26 Feb 2018 19:01:26 +0100 Subject: [PATCH 04/23] issue #107 add fill,copy,copy_n algorithm --- doc/wiki/Main_Page.mwiki | 3 ++ doc/wiki/algorithm/copy.mwiki | 27 +++++++++++ doc/wiki/algorithm/copy_n.mwiki | 28 ++++++++++++ doc/wiki/algorithm/fill.mwiki | 26 +++++++++++ simdpp/algorithm/copy.h | 43 ++++++++++++++++++ simdpp/algorithm/copy_n.h | 29 ++++++++++++ simdpp/algorithm/fill.h | 76 +++++++++++++++++++++++++++++++ simdpp/simd.h | 6 +++ test/CMakeLists.txt | 5 +++ test/insn/copy.cc | 79 +++++++++++++++++++++++++++++++++ test/insn/copy_n.cc | 74 ++++++++++++++++++++++++++++++ test/insn/fill.cc | 71 +++++++++++++++++++++++++++++ test/insn/tests.cc | 61 ++++++++++++++----------- test/insn/tests.h | 43 ++++++++++-------- 14 files changed, 525 insertions(+), 46 deletions(-) create mode 100644 doc/wiki/algorithm/copy.mwiki create mode 100644 doc/wiki/algorithm/copy_n.mwiki create mode 100644 doc/wiki/algorithm/fill.mwiki create mode 100644 simdpp/algorithm/copy.h create mode 100644 simdpp/algorithm/copy_n.h create mode 100644 simdpp/algorithm/fill.h create mode 100644 test/insn/copy.cc create mode 100644 test/insn/copy_n.cc create mode 100644 test/insn/fill.cc diff --git a/doc/wiki/Main_Page.mwiki b/doc/wiki/Main_Page.mwiki index 246b651e..3a812ff2 100644 --- a/doc/wiki/Main_Page.mwiki +++ b/doc/wiki/Main_Page.mwiki @@ -177,6 +177,9 @@ the compiler will generate.
{{ltt|algorithm/transform}}
{{ltt|algorithm/reduce}}
+{{ltt|algorithm/fill}}
+{{ltt|algorithm/copy}}
+{{ltt|algorithm/copy_n}}
|- class="row rowbottom" diff --git a/doc/wiki/algorithm/copy.mwiki b/doc/wiki/algorithm/copy.mwiki new file mode 100644 index 00000000..31c21712 --- /dev/null +++ b/doc/wiki/algorithm/copy.mwiki @@ -0,0 +1,27 @@ +{{simdpp/title|copy}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | + template + T* copy(T const* first, T const* last, T* out) +}} + +{{dcl end}} +{{misc/navbar}} + +{{tt|copy}} Copies the elements in the range, defined by {{tt|[first, last)}}, to another range beginning at {{tt|out}}. + +===Parameters=== +{{par begin}} +{{par | first, last | the range of elements to copy}} +{{par | out | the beginning of the destination range.}} +{{par end}} + +===Return value=== +Output adress to the element in the destination range, one past the last element copied. + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc fill}} +{{dsc inc | algorithm/dsc copy_n}} +{{dsc end}} diff --git a/doc/wiki/algorithm/copy_n.mwiki b/doc/wiki/algorithm/copy_n.mwiki new file mode 100644 index 00000000..ad0e73c1 --- /dev/null +++ b/doc/wiki/algorithm/copy_n.mwiki @@ -0,0 +1,28 @@ +{{simdpp/title|copy}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | + template + T* copy_n(T const* first, Size n, T* out) +}} + +{{dcl end}} +{{misc/navbar}} + +{{tt|copy_n}} Copies exactly {{tt|Size}} values from the range beginning at {{tt|first}} to the range beginning at {{tt|out}}, if {{tt|count>0}}. Does nothing otherwise. + +===Parameters=== +{{par begin}} +{{par | first | the beginning of the range of elements to copy from}} +{{par | Size | number of the elements to copy}} +{{par | out | the beginning of the destination range}} +{{par end}} + +===Return value=== +Output adress in the destination range, pointing past the last element copied if {{tt|Size>0}} or {{tt|out}} otherwise. + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc fill}} +{{dsc inc | algorithm/dsc copy}} +{{dsc end}} diff --git a/doc/wiki/algorithm/fill.mwiki b/doc/wiki/algorithm/fill.mwiki new file mode 100644 index 00000000..46648738 --- /dev/null +++ b/doc/wiki/algorithm/fill.mwiki @@ -0,0 +1,26 @@ +{{simdpp/title|fill}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | + template + void fill(T* first, T* last, U value) +}} + +{{dcl end}} +{{misc/navbar}} + +{{tt|fill}} Assigns the given {{tt|value}} to the elements in the range {{tt|[first, last)}}. + +===Parameters=== +{{par begin}} +{{par | first, last | the range of elements to modify}} +{{par | value | the value to be assigned}} +{{par end}} + +===Return value=== +(none) + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc copy}} +{{dsc end}} diff --git a/simdpp/algorithm/copy.h b/simdpp/algorithm/copy.h new file mode 100644 index 00000000..403ecf87 --- /dev/null +++ b/simdpp/algorithm/copy.h @@ -0,0 +1,43 @@ +/* Copyright (C) 2018 Povilas Kanapickas + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_COPY_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_COPY_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include + +namespace simdpp { + namespace SIMDPP_ARCH_NAMESPACE { + + template + T* copy(T const* first, T const* last, T* out) + { + struct UnaryOpCopy + { + using simd_type_T = typetraits::simd_type; + SIMDPP_INL T operator()(T const &a) const noexcept + { + return a; + } + + SIMDPP_INL simd_type_T operator()(simd_type_T const &a) const noexcept + { + return a; + } + }; + + return transform(first, last, out, UnaryOpCopy{}); + } + + } // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp + +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_COPY_H diff --git a/simdpp/algorithm/copy_n.h b/simdpp/algorithm/copy_n.h new file mode 100644 index 00000000..4999b489 --- /dev/null +++ b/simdpp/algorithm/copy_n.h @@ -0,0 +1,29 @@ +/* Copyright (C) 2018 Povilas Kanapickas + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_COPY_N_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_COPY_N_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include + +namespace simdpp { + namespace SIMDPP_ARCH_NAMESPACE { + + template T* copy_n(T const* first, Size n, T* out) + { + if (n <= Size(0)) return out; + return copy(first, first + n, out); + } + + } // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp + +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_COPY_N_H diff --git a/simdpp/algorithm/fill.h b/simdpp/algorithm/fill.h new file mode 100644 index 00000000..f3edec36 --- /dev/null +++ b/simdpp/algorithm/fill.h @@ -0,0 +1,76 @@ +/* Copyright (C) 2018 Povilas Kanapickas + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_FILL_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_FILL_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace simdpp { + namespace SIMDPP_ARCH_NAMESPACE { + + template + void fill(T* first, T* last, U value) + { +#ifndef NDEBUG //precondition debug mode + if (!first) + throw std::runtime_error("fill - null ptr first."); + if (!last) + throw std::runtime_error("fill - null ptr last."); +#endif + using simd_type_T = typetraits::simd_type; + const auto alignment = typetraits::alignment; + + simd_type_T valsimd = splat((T)value); + + //Define loop counter + const auto simd_size = simd_type_T::base_length; + const auto size = std::distance(first, last); + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + auto i = 0u; + + //---prologue + for (; i < size_prologue_loop; ++i) + { + *first++=(T)value; + } + + //---main simd loop + for (; i < size_simd_loop; i += simd_size) + { + store(first, valsimd); + first += simd_size; + } + + + //---epilogue + for (; i < size; ++i) + { + *first++ = (T)value; + } + + } + + } // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp + +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_FILL_H \ No newline at end of file diff --git a/simdpp/simd.h b/simdpp/simd.h index afda9846..31a67386 100644 --- a/simdpp/simd.h +++ b/simdpp/simd.h @@ -196,6 +196,12 @@ #include #include +#include +#include +#include +#include +#include + /** @def SIMDPP_NO_DISPATCHER Disables internal dispatching functionality. If the internal dispathcher mechanism is not needed, the user can define the @c SIMDPP_NO_DISPATCHER. diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index ecf2fc6f..881d2914 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -63,7 +63,10 @@ set(TEST_INSN_ARCH_SOURCES insn/compare.cc insn/construct.cc insn/convert.cc + insn/copy.cc + insn/copy_n.cc insn/for_each.cc + insn/fill.cc insn/math_fp.cc insn/math_int.cc insn/math_shift.cc @@ -72,9 +75,11 @@ set(TEST_INSN_ARCH_SOURCES insn/shuffle.cc insn/shuffle_bytes.cc insn/permute_generic.cc + insn/reduce.cc insn/shuffle_generic.cc insn/test_utils.cc insn/tests.cc + insn/transform.cc insn/transpose.cc ) diff --git a/test/insn/copy.cc b/test/insn/copy.cc new file mode 100644 index 00000000..f11cd630 --- /dev/null +++ b/test/insn/copy.cc @@ -0,0 +1,79 @@ +/* Copyright (C) 2018 Povilas Kanapickas + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include + + +namespace SIMDPP_ARCH_NAMESPACE { + + template + void test_copy_type(TestResultsSet& ts, TestReporter& tr) + { + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + { //test prologue + vector_t ivect = { (T)42,(T)42 }; + vector_t ovect = { (T)0,(T)0 }; + + copy(ivect.data(), ivect.data() + ivect.size(), ovect.data()); + for (auto i = 0; i < ovect.size(); ++i) //TODO make TEST_EQUAL_COLECTIONS + { + TEST_EQUAL(tr, ivect[i], ovect[i]); + } + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(150, (T)42); + vector_t ovect(150, (T)0); + + copy(ivect.data(), ivect.data() + ivect.size(), ovect.data()); + for (auto i = 0; i < ovect.size(); ++i) //TODO make TEST_EQUAL_COLECTIONS + { + TEST_EQUAL(tr, ovect[i], ivect[i]); + } + } + { //test main loop and epilogue on range + vector_aligned_t ivect(150, (T)42); + vector_t ovect(150, (T)0); + + copy(ivect.data() + 10u, ivect.data() + ivect.size() - 10u, ovect.data()+10u); + for (auto i = 0u; i < 10u; ++i) //TODO make TEST_EQUAL_COLECTIONS + { + TEST_EQUAL(tr, ovect[i], (T)0); + } + for (auto i = 10; i < ovect.size() - 10u; ++i) //TODO make TEST_EQUAL_COLECTIONS + { + TEST_EQUAL(tr, ovect[i], ivect[i]); + } + for (auto i = ovect.size() - 10u; i < ovect.size(); ++i) //TODO make TEST_EQUAL_COLECTIONS + { + TEST_EQUAL(tr, ovect[i], (T)0); + } + } + } + + void test_copy(TestResults& res, TestReporter& tr) + { + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("copy"); + test_copy_type(ts, tr); + test_copy_type(ts, tr); + test_copy_type(ts, tr); + test_copy_type(ts, tr); + test_copy_type(ts, tr); + test_copy_type(ts, tr); + test_copy_type(ts, tr); + test_copy_type(ts, tr); + test_copy_type(ts, tr); + test_copy_type(ts, tr); + } + +} // namespace SIMDPP_ARCH_NAMESPACE \ No newline at end of file diff --git a/test/insn/copy_n.cc b/test/insn/copy_n.cc new file mode 100644 index 00000000..343ee352 --- /dev/null +++ b/test/insn/copy_n.cc @@ -0,0 +1,74 @@ +/* Copyright (C) 2018 Povilas Kanapickas + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include + + +namespace SIMDPP_ARCH_NAMESPACE { + + template + void test_copy_n_type(TestResultsSet& ts, TestReporter& tr) + { + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + { //test prologue + vector_t ivect = { (T)42,(T)42 }; + vector_t ovect = { (T)0,(T)0 }; + + copy_n(ivect.data(),2, ovect.data()); + for (auto i = 0; i < ovect.size(); ++i) //TODO make TEST_EQUAL_COLECTIONS + { + TEST_EQUAL(tr, ivect[i], ovect[i]); + } + } + { //test negative don't change ovect + vector_t ivect = { (T)42,(T)42 }; + vector_t ovect = { (T)0,(T)0 }; + + copy_n(ivect.data(), -2, ovect.data()); + + TEST_EQUAL(tr, (T)0, ovect[0]); + TEST_EQUAL(tr, (T)0, ovect[1]); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(150, (T)42); + vector_t ovect(150, (T)0); + + copy_n(ivect.data(),100, ovect.data()); + for (auto i = 0; i < 100; ++i) //TODO make TEST_EQUAL_COLECTIONS + { + TEST_EQUAL(tr, ovect[i], ivect[i]); + } + for (auto i = 100; i (ts, tr); + test_copy_n_type(ts, tr); + test_copy_n_type(ts, tr); + test_copy_n_type(ts, tr); + test_copy_n_type(ts, tr); + test_copy_n_type(ts, tr); + test_copy_n_type(ts, tr); + test_copy_n_type(ts, tr); + test_copy_n_type(ts, tr); + test_copy_n_type(ts, tr); + } + +} // namespace SIMDPP_ARCH_NAMESPACE \ No newline at end of file diff --git a/test/insn/fill.cc b/test/insn/fill.cc new file mode 100644 index 00000000..1796a85f --- /dev/null +++ b/test/insn/fill.cc @@ -0,0 +1,71 @@ +/* Copyright (C) 2018 Povilas Kanapickas + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include + + +namespace SIMDPP_ARCH_NAMESPACE { + + template + void test_fill_type(TestResultsSet& ts, TestReporter& tr) + { + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + { //test prologue + vector_t ivect = { (T)0,(T)1 }; + vector_t expected = { (T)42,(T)42 }; + + fill(ivect.data(), ivect.data() + ivect.size(), (T)42); + for (auto i = 0; i < expected.size(); ++i) //TODO make TEST_EQUAL_COLECTIONS + { + TEST_EQUAL(tr, expected[i], ivect[i]); + } + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(150, (T)0); + vector_t expected(150, (T)42); + + fill(ivect.data(), ivect.data() + ivect.size(), (T)42); + for (auto i = 0; i < expected.size(); ++i) //TODO make TEST_EQUAL_COLECTIONS + { + TEST_EQUAL(tr, expected[i], ivect[i]); + } + } + { //test main loop and epilogue on range + vector_aligned_t ivect(150, (T)0); + vector_t expected(150, (T)42); + + fill(ivect.data() + 10u, ivect.data() + ivect.size() - 10u, (T)42); + for (auto i = 10; i < expected.size() - 10u; ++i) //TODO make TEST_EQUAL_COLECTIONS + { + TEST_EQUAL(tr, expected[i], ivect[i]); + } + } + } + + void test_fill(TestResults& res, TestReporter& tr) + { + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("fill"); + test_fill_type(ts, tr); + test_fill_type(ts, tr); + test_fill_type(ts, tr); + test_fill_type(ts, tr); + test_fill_type(ts, tr); + test_fill_type(ts, tr); + test_fill_type(ts, tr); + test_fill_type(ts, tr); + test_fill_type(ts, tr); + test_fill_type(ts, tr); + } + +} // namespace SIMDPP_ARCH_NAMESPACE \ No newline at end of file diff --git a/test/insn/tests.cc b/test/insn/tests.cc index 1dfe6978..ced16636 100644 --- a/test/insn/tests.cc +++ b/test/insn/tests.cc @@ -46,32 +46,39 @@ static_assert(sizeof(simdpp::float64<8>) == 64, "Incorrect vector size"); namespace SIMDPP_ARCH_NAMESPACE { -void main_test_function(TestResults& res, TestReporter& tr, const TestOptions& opts) -{ - test_test_utils(res); - - // Tests are ordered in such a way so that base functionality that other - // tests depend on is tested first. - test_construct(res); - test_memory_load(res, tr); - test_memory_store(res, tr); - - test_blend(res); - test_bitwise(res, tr); - test_permute_generic(res); - test_shuffle_generic(res); - test_shuffle(res); - test_shuffle_bytes(res, tr); - - test_convert(res); - test_math_fp(res, opts); - test_math_int(res); - test_compare(res); - test_math_shift(res); - test_transpose(res); - - test_for_each(res, tr); -} + void main_test_function(TestResults& res, TestReporter& tr, const TestOptions& opts) + { + test_test_utils(res); + + // Tests are ordered in such a way so that base functionality that other + // tests depend on is tested first. + test_construct(res); + test_memory_load(res, tr); + test_memory_store(res, tr); + + test_blend(res); + test_bitwise(res, tr); + test_permute_generic(res); + test_shuffle_generic(res); + test_shuffle(res); + test_shuffle_bytes(res, tr); + + test_convert(res); + test_math_fp(res, opts); + test_math_int(res); + test_compare(res); + test_math_shift(res); + test_transpose(res); + + test_for_each(res, tr); + + //algorithm + test_transform(res, tr); + test_reduce(res, tr); + test_copy(res,tr); + test_copy_n(res,tr); + test_fill(res,tr); + } } // namespace SIMDPP_ARCH_NAMESPACE /* TODO: here we use dispatcher only to register the available functions, not @@ -93,7 +100,7 @@ std::vector get_test_archs() using FunPtr = void(*)(TestResults&, TestReporter&, const TestOptions&); SIMDPP_DISPATCH_COLLECT_FUNCTIONS(versions, main_test_function, FunPtr) std::vector result; - result.assign(versions, versions+SIMDPP_DISPATCH_MAX_ARCHS); + result.assign(versions, versions + SIMDPP_DISPATCH_MAX_ARCHS); return result; } #endif diff --git a/test/insn/tests.h b/test/insn/tests.h index 5fa1ce0a..0160b22e 100644 --- a/test/insn/tests.h +++ b/test/insn/tests.h @@ -17,25 +17,30 @@ namespace SIMDPP_ARCH_NAMESPACE { void main_test_function(TestResults& res, TestReporter& tr, const TestOptions& opts); -void test_bitwise(TestResults& res, TestReporter& tr); -void test_blend(TestResults& res); -void test_compare(TestResults& res); -void test_convert(TestResults& res); -void test_construct(TestResults& res); -void test_for_each(TestResults& res, TestReporter& tr); -void test_math_fp(TestResults& res, const TestOptions& opts); -void test_math_int(TestResults& res); -void test_math_shift(TestResults& res); -void test_memory_load(TestResults& res, TestReporter& tr); -void test_memory_store(TestResults& res, TestReporter& tr); -void test_set(TestResults& res); -void test_shuffle(TestResults& res); -void test_shuffle_bytes(TestResults& res, TestReporter& tr); -void test_shuffle_generic(TestResults& res); -void test_permute_generic(TestResults& res); -void test_shuffle_transpose(TestResults& res); -void test_test_utils(TestResults& res); -void test_transpose(TestResults& res); + void test_bitwise(TestResults& res, TestReporter& tr); + void test_blend(TestResults& res); + void test_compare(TestResults& res); + void test_convert(TestResults& res); + void test_construct(TestResults& res); + void test_copy(TestResults& res, TestReporter& tr); + void test_copy_n(TestResults& res, TestReporter& tr); + void test_for_each(TestResults& res, TestReporter& tr); + void test_fill(TestResults& res, TestReporter& tr); + void test_math_fp(TestResults& res, const TestOptions& opts); + void test_math_int(TestResults& res); + void test_math_shift(TestResults& res); + void test_memory_load(TestResults& res, TestReporter& tr); + void test_memory_store(TestResults& res, TestReporter& tr); + void test_set(TestResults& res); + void test_shuffle(TestResults& res); + void test_shuffle_bytes(TestResults& res, TestReporter& tr); + void test_shuffle_generic(TestResults& res); + void test_permute_generic(TestResults& res); + void test_reduce(TestResults& res, TestReporter& tr); + void test_shuffle_transpose(TestResults& res); + void test_test_utils(TestResults& res); + void test_transpose(TestResults& res); + void test_transform(TestResults& res, TestReporter& tr); } // namespace SIMDPP_ARCH_NAMESPACE From 4bc2c6346d14d640c3410b1b03bbfc0e25607d1a Mon Sep 17 00:00:00 2001 From: RETORNAZ Date: Wed, 28 Feb 2018 13:39:15 +0100 Subject: [PATCH 05/23] issue #107 gcc compil fix --- simdpp/algorithm/copy.h | 2 +- simdpp/algorithm/fill.h | 4 ++-- simdpp/algorithm/helper_input_range.h | 2 +- simdpp/algorithm/reduce.h | 4 ++-- simdpp/algorithm/transform.h | 10 +++++----- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/simdpp/algorithm/copy.h b/simdpp/algorithm/copy.h index 403ecf87..dc505866 100644 --- a/simdpp/algorithm/copy.h +++ b/simdpp/algorithm/copy.h @@ -22,7 +22,7 @@ namespace simdpp { { struct UnaryOpCopy { - using simd_type_T = typetraits::simd_type; + using simd_type_T = typename typetraits::simd_type; SIMDPP_INL T operator()(T const &a) const noexcept { return a; diff --git a/simdpp/algorithm/fill.h b/simdpp/algorithm/fill.h index f3edec36..96b8ba04 100644 --- a/simdpp/algorithm/fill.h +++ b/simdpp/algorithm/fill.h @@ -33,7 +33,7 @@ namespace simdpp { if (!last) throw std::runtime_error("fill - null ptr last."); #endif - using simd_type_T = typetraits::simd_type; + using simd_type_T = typename typetraits::simd_type; const auto alignment = typetraits::alignment; simd_type_T valsimd = splat((T)value); @@ -73,4 +73,4 @@ namespace simdpp { } // namespace SIMDPP_ARCH_NAMESPACE } // namespace simdpp -#endif //LIBSIMDPP_SIMDPP_ALGORITHM_FILL_H \ No newline at end of file +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_FILL_H diff --git a/simdpp/algorithm/helper_input_range.h b/simdpp/algorithm/helper_input_range.h index b4c81468..e12217a5 100644 --- a/simdpp/algorithm/helper_input_range.h +++ b/simdpp/algorithm/helper_input_range.h @@ -35,7 +35,7 @@ namespace simdpp { if (!last) throw std::runtime_error("helper_input_range - null ptr last."); #endif - using simd_type_T = typetraits::simd_type; + using simd_type_T = typename typetraits::simd_type; const auto simd_size = simd_type_T::base_length; const auto alignment = typetraits::alignment; diff --git a/simdpp/algorithm/reduce.h b/simdpp/algorithm/reduce.h index 19978957..c3d909cf 100644 --- a/simdpp/algorithm/reduce.h +++ b/simdpp/algorithm/reduce.h @@ -32,7 +32,7 @@ namespace simdpp { if (!last) throw std::runtime_error("reduce - null ptr last."); #endif - using simd_type_T = typetraits::simd_type; + using simd_type_T = typename typetraits::simd_type; const auto alignment = typetraits::alignment; simd_type_T accusimd = splat((T)0); @@ -80,7 +80,7 @@ namespace simdpp { if (!last) throw std::runtime_error("reduce - null ptr last."); #endif - using simd_type_T = typetraits::simd_type; + using simd_type_T = typename typetraits::simd_type; const auto alignment = typetraits::alignment; //Define loop counter diff --git a/simdpp/algorithm/transform.h b/simdpp/algorithm/transform.h index 54d3a7e6..b607f47a 100644 --- a/simdpp/algorithm/transform.h +++ b/simdpp/algorithm/transform.h @@ -35,8 +35,8 @@ namespace simdpp { if (!out) throw std::runtime_error("transform - null ptr out."); #endif - using simd_type_T = typetraits::simd_type; - using simd_type_U = typetraits::simd_type; + using simd_type_T = typename typetraits::simd_type; + using simd_type_U = typename typetraits::simd_type; static_assert (simd_type_T::base_length == simd_type_U::base_length , "mismatch base_length between T and U" @@ -101,9 +101,9 @@ namespace simdpp { if (!out) throw std::runtime_error("transform - null ptr out."); #endif - using simd_type_T1 = typetraits::simd_type; - using simd_type_T2 = typetraits::simd_type; - using simd_type_U = typetraits::simd_type; + using simd_type_T1 = typename typetraits::simd_type; + using simd_type_T2 = typename typetraits::simd_type; + using simd_type_U = typename typetraits::simd_type; static_assert (simd_type_T1::base_length == simd_type_T2::base_length , "mismatch base_length between T1 and T2" From 22e53578f81fa1de4575524848d8dfc69929e494 Mon Sep 17 00:00:00 2001 From: thomas retornaz Date: Mon, 5 Mar 2018 02:34:07 +0100 Subject: [PATCH 06/23] issue #107 add search max/min --- doc/wiki/Main_Page.mwiki | 8 +- doc/wiki/algorithm/max.mwiki | 29 +++++++ doc/wiki/algorithm/min.mwiki | 29 +++++++ simdpp/algorithm/max.h | 139 ++++++++++++++++++++++++++++++++++ simdpp/algorithm/min.h | 141 +++++++++++++++++++++++++++++++++++ simdpp/simd.h | 2 + test/insn/max.cc | 105 ++++++++++++++++++++++++++ test/insn/min.cc | 104 ++++++++++++++++++++++++++ 8 files changed, 554 insertions(+), 3 deletions(-) create mode 100644 doc/wiki/algorithm/max.mwiki create mode 100644 doc/wiki/algorithm/min.mwiki create mode 100644 simdpp/algorithm/max.h create mode 100644 simdpp/algorithm/min.h create mode 100644 test/insn/max.cc create mode 100644 test/insn/min.cc diff --git a/doc/wiki/Main_Page.mwiki b/doc/wiki/Main_Page.mwiki index 3a812ff2..c3e9a732 100644 --- a/doc/wiki/Main_Page.mwiki +++ b/doc/wiki/Main_Page.mwiki @@ -175,11 +175,13 @@ the compiler will generate. '''[[Algorithm | STL like algorithm]]'''
-{{ltt|algorithm/transform}}
-{{ltt|algorithm/reduce}}
-{{ltt|algorithm/fill}}
{{ltt|algorithm/copy}}
{{ltt|algorithm/copy_n}}
+{{ltt|algorithm/fill}}
+{{ltt|algorithm/max}}
+{{ltt|algorithm/min}}
+{{ltt|algorithm/transform}}
+{{ltt|algorithm/reduce}}
|- class="row rowbottom" diff --git a/doc/wiki/algorithm/max.mwiki b/doc/wiki/algorithm/max.mwiki new file mode 100644 index 00000000..512a1fc8 --- /dev/null +++ b/doc/wiki/algorithm/max.mwiki @@ -0,0 +1,29 @@ +{{simdpp/title|max}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | num=1 | +template + T max(T const* first, T const* last, Comp comp); +}} +{{dcl | num=2 | +template + T max(T const* first, T const* last); +}} +{{dcl end}} +{{misc/navbar}} + +{{tt|max}} Returns the value of the element with the largest value in the range[first, last[ over comp (If no comp return simply the max).The lowest possible value for the order if the range is empty. + +===Parameters=== +{{par begin}} +{{par | first, last | the range of processed elements}} +{{par | comp | binary comparison operator}} +{{par end}} + +===Return value=== +(max) + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc min}} +{{dsc end}} diff --git a/doc/wiki/algorithm/min.mwiki b/doc/wiki/algorithm/min.mwiki new file mode 100644 index 00000000..19fc0fac --- /dev/null +++ b/doc/wiki/algorithm/min.mwiki @@ -0,0 +1,29 @@ +{{simdpp/title|min}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | num=1 | +template + T min(T const* first, T const* last, Comp comp); +}} +{{dcl | num=2 | +template + T min(T const* first, T const* last); +}} +{{dcl end}} +{{misc/navbar}} + +{{tt|max}} Returns the value of the element with the lowest value in the range[first, last[ over comp (If no comp return simply the min).The greatest possible value for the order if the range is empty. + +===Parameters=== +{{par begin}} +{{par | first, last | the range of processed elements}} +{{par | comp | binary comparison operator}} +{{par end}} + +===Return value=== +(min) + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc max}} +{{dsc end}} \ No newline at end of file diff --git a/simdpp/algorithm/max.h b/simdpp/algorithm/max.h new file mode 100644 index 00000000..9a3dad4f --- /dev/null +++ b/simdpp/algorithm/max.h @@ -0,0 +1,139 @@ +/* Copyright (C) 2018 Povilas Kanapickas + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_MAX_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_MAX_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include //numeric_limits +#include //max +#include +#include +#include +#include + +namespace simdpp { + namespace SIMDPP_ARCH_NAMESPACE { + + //Returns the value of the element with the largest value in the range[first, last[ over comp, + //The lowest possible value for the order if the range is empty. + template + T max(T const* first, T const* last, Comp comp) + { +#ifndef NDEBUG //precondition debug mode + if (!first) + throw std::runtime_error("max - null ptr first."); + if (!last) + throw std::runtime_error("max - null ptr last."); +#endif + using simd_type_T = typename typetraits::simd_type; + using simd_mask_T = typename simdpp::typetraits::simd_mask_type; + + if (first == last) return comp(T(0), T(1)) ? std::numeric_limits::max() : std::numeric_limits::lowest(); //stolen from boost::simd + + //Define loop counter + const auto simd_size = simd_type_T::base_length; + const auto size = std::distance(first, last); + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + auto i = 0u; + auto max_val = *first; //initialize with thirst element + + //---prologue + for (; i < size_prologue_loop; ++i) + { + if (comp(*first, max_val)) + { + max_val = *first; + } + first++; + } + + //---main simd loop + simd_type_T current_max_simd = splat(max_val); + for (; i < size_simd_loop; i += simd_size) + { + const simd_type_T element = load(first); + const simd_mask_T mask = comp(current_max_simd, element); + current_max_simd = blend(current_max_simd, element, mask); + first += simd_size; + } + //extract max from simdtype + for_each(current_max_simd, [&](T el) {if (comp(el, max_val)) { max_val = el; }}); + + //---epilogue + for (; i < size; ++i) + { + if (comp(*first, max_val)) + { + max_val = *first; + } + first++; + } + return max_val; + } + + //Returns the value of the element with the largest value in the range[first, last[, + //The lowest possible value for the order if the range is empty. + template + T max(T const* first, T const* last) + { +#ifndef NDEBUG //precondition debug mode + if (!first) + throw std::runtime_error("max - null ptr first."); + if (!last) + throw std::runtime_error("max - null ptr last."); +#endif + using simd_type_T = typename typetraits::simd_type; + + if (first == last) return std::numeric_limits::lowest(); + + //Define loop counter + const auto simd_size = simd_type_T::base_length; + const auto size = std::distance(first, last); + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + auto i = 0u; + auto max_val = *first; //initialize with thirst element + + //---prologue + for (; i < size_prologue_loop; ++i) + { + max_val = std::max(max_val, *first++); + } + //---main simd loop + simd_type_T current_max_simd = splat(max_val); + for (; i < size_simd_loop; i += simd_size) + { + const simd_type_T el = load(first); + current_max_simd = max(current_max_simd, el); + first += simd_size; + } + //extract max from simdtype + max_val = reduce_max(current_max_simd); + + //---epilogue + for (; i < size; ++i) + { + max_val = std::max(max_val, *first++); + } + + return max_val; + } + + } // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_MAX_H diff --git a/simdpp/algorithm/min.h b/simdpp/algorithm/min.h new file mode 100644 index 00000000..edfaf7f5 --- /dev/null +++ b/simdpp/algorithm/min.h @@ -0,0 +1,141 @@ +/* Copyright (C) 2018 Povilas Kanapickas + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_MIN_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_MIN_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include //numeric_limits +#include //min +#include +#include +#include +#include + +namespace simdpp { + namespace SIMDPP_ARCH_NAMESPACE { + + //Returns the value of the element with the smallest value in the range[first, last[ over comp, + //The largest possible value for the order if the range is empty. + template + T min(T const* first, T const* last, Comp comp) + { +#ifndef NDEBUG //precondition debug mode + if (!first) + throw std::runtime_error("min - null ptr first."); + if (!last) + throw std::runtime_error("min - null ptr last."); +#endif + using simd_type_T = typename typetraits::simd_type; + using simd_mask_T = typename simdpp::typetraits::simd_mask_type; + + if (first == last) return comp(T(0), T(1)) ? std::numeric_limits::lowest() : std::numeric_limits::max(); //stolen from boost::simd + + //Define loop counter + const auto simd_size = simd_type_T::base_length; + const auto size = std::distance(first, last); + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + auto i = 0u; + auto min_val = *first; //initialize with thirst element + + //---prologue + for (; i < size_prologue_loop; ++i) + { + if (comp(min_val, *first)) + { + min_val = *first; + } + first++; + } + + //---main simd loop + simd_type_T current_min_simd = splat(min_val); + for (; i < size_simd_loop; i += simd_size) + { + const simd_type_T element = load(first); + const simd_mask_T mask = comp(element, current_min_simd); + current_min_simd = blend(current_min_simd, element, mask); + first += simd_size; + } + //extract min from simdtype + for_each(current_min_simd, [&](T el) {if (comp(min_val, el)) { min_val = el; }}); + + //---epilogue + for (; i < size; ++i) + { + if (comp(min_val, *first)) + { + min_val = *first; + } + first++; + } + return min_val; + } + + //Returns the value of the element with the smallest value in the range[first, last[, + //The largest possible value for the order if the range is empty. + template + T min(T const* first, T const* last) + { +#ifndef NDEBUG //precondition debug mode + if (!first) + throw std::runtime_error("min - null ptr first."); + if (!last) + throw std::runtime_error("min - null ptr last."); +#endif + using simd_type_T = typename typetraits::simd_type; + + if (first == last) return std::numeric_limits::max(); + + //Define loop counter + const auto simd_size = simd_type_T::base_length; + const auto size = std::distance(first, last); + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + auto i = 0u; + auto min_val = *first; //initialize with thirst element + + //---prologue + for (; i < size_prologue_loop; ++i) + { + min_val = std::min(min_val, *first++); + } + + //---main simd loop + simd_type_T current_min_simd = splat(min_val); + for (; i < size_simd_loop; i += simd_size) + { + const simd_type_T el = load(first); + current_min_simd = min(el, current_min_simd); + first += simd_size; + } + //extract min from simdtype + min_val = reduce_min(current_min_simd); + + //---epilogue + for (; i < size; ++i) + { + min_val = std::min(min_val, *first++); + } + + return min_val; + } + + } // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp + +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_MIN_H diff --git a/simdpp/simd.h b/simdpp/simd.h index 31a67386..f8ac6fd7 100644 --- a/simdpp/simd.h +++ b/simdpp/simd.h @@ -199,6 +199,8 @@ #include #include #include +#include +#include #include #include diff --git a/test/insn/max.cc b/test/insn/max.cc new file mode 100644 index 00000000..6bd7344c --- /dev/null +++ b/test/insn/max.cc @@ -0,0 +1,105 @@ +/* Copyright (C) 2018 Povilas Kanapickas + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include +#include + + +namespace SIMDPP_ARCH_NAMESPACE { + + template + struct binary_cmp_greater + { + public: + binary_cmp_greater() = default; + using simd_mask_T = typename simdpp::typetraits::simd_mask_type; + using simd_type_T = typename simdpp::typetraits::simd_type; + + SIMDPP_INL bool operator()(T a, T b) { return a > b; } + + SIMDPP_INL simd_mask_T operator()(const simd_type_T& a, const simd_type_T& b) { return simdpp::cmp_gt(a, b); } + }; + + template + void test_max_type(TestResultsSet& ts, TestReporter& tr) + { + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + auto cmpOPGreater = binary_cmp_greater(); + {//test classical max + { //test prologue + vector_aligned_t ivect(5); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[0] = { (T)127 }; + TEST_EQUAL(tr, (T)127, max(ivect.data(), ivect.data() + ivect.size())); + } + { //test epilogue + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[99] = { (T)127 }; + TEST_EQUAL(tr, (T)127, max(ivect.data(), ivect.data() + ivect.size())); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[50] = { (T)127 }; + TEST_EQUAL(tr, (T)127, max(ivect.data(), ivect.data() + ivect.size())); + } + {// test first==last + vector_aligned_t ivect(5); + TEST_EQUAL(tr, std::numeric_limits::lowest(), max(ivect.data() + ivect.size(), ivect.data() + ivect.size())); + } + } + {//test max with comp op + { //test prologue + vector_aligned_t ivect(5); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[0] = { (T)127 }; + TEST_EQUAL(tr, (T)127, max(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); + } + { //test epilogue + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[99] = { (T)127 }; + TEST_EQUAL(tr, (T)127, max(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[50] = { (T)127 }; + TEST_EQUAL(tr, (T)127, max(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); + } + {// test first==last + vector_aligned_t ivect(5); + TEST_EQUAL(tr, std::numeric_limits::lowest(), max(ivect.data() + ivect.size(), ivect.data() + ivect.size(), cmpOPGreater)); + } + } + + } + + void test_max(TestResults& res, TestReporter& tr) + { + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("max"); + test_max_type(ts, tr); + test_max_type(ts, tr); + //test_max_type(ts, tr); //FIXME + //test_max_type(ts, tr); //FIXME + test_max_type(ts, tr); + test_max_type(ts, tr); + test_max_type(ts, tr); + test_max_type(ts, tr); + test_max_type(ts, tr); + test_max_type(ts, tr); + } + +} // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/min.cc b/test/insn/min.cc new file mode 100644 index 00000000..9eef179e --- /dev/null +++ b/test/insn/min.cc @@ -0,0 +1,104 @@ +/* Copyright (C) 2018 Povilas Kanapickas + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include +#include + +namespace SIMDPP_ARCH_NAMESPACE { + + template + struct binary_cmp_greater + { + public: + binary_cmp_greater() = default; + using simd_mask_T = typename simdpp::typetraits::simd_mask_type; + using simd_type_T = typename simdpp::typetraits::simd_type; + + SIMDPP_INL bool operator()(T a, T b) { return a > b; } + + SIMDPP_INL simd_mask_T operator()(const simd_type_T& a, const simd_type_T& b) { return simdpp::cmp_gt(a, b); } + }; + + template + void test_min_type(TestResultsSet& ts, TestReporter& tr) + { + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + auto cmpOPGreater = binary_cmp_greater(); + {//test classical min + { //test prologue + vector_aligned_t ivect(5); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[0] = { (T)0 }; + TEST_EQUAL(tr, (T)0, min(ivect.data(), ivect.data() + ivect.size())); + } + { //test epilogue + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[99] = { (T)0 }; + TEST_EQUAL(tr, (T)0, min(ivect.data(), ivect.data() + ivect.size())); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[50] = { (T)0 }; + TEST_EQUAL(tr, (T)0, min(ivect.data(), ivect.data() + ivect.size())); + } + {// test first==last + vector_aligned_t ivect(5); + TEST_EQUAL(tr, std::numeric_limits::max(), min(ivect.data() + ivect.size(), ivect.data() + ivect.size())); + } + } + {//test min with comp op + { //test prologue + vector_aligned_t ivect(5); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[0] = { (T)0 }; + TEST_EQUAL(tr, (T)0, min(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); + } + { //test epilogue + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[99] = { (T)0 }; + TEST_EQUAL(tr, (T)0, min(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[50] = { (T)0 }; + TEST_EQUAL(tr, (T)0, min(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); + } + {// test first==last + vector_aligned_t ivect(5); + TEST_EQUAL(tr, std::numeric_limits::max(), min(ivect.data() + ivect.size(), ivect.data() + ivect.size(), cmpOPGreater)); + } + } + + } + + void test_min(TestResults& res, TestReporter& tr) + { + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("min"); + test_min_type(ts, tr); + test_min_type(ts, tr); + //test_min_type(ts, tr); //FIXME + //test_min_type(ts, tr); //FIXME + test_min_type(ts, tr); + test_min_type(ts, tr); + test_min_type(ts, tr); + test_min_type(ts, tr); + test_min_type(ts, tr); + test_min_type(ts, tr); + } + +} // namespace SIMDPP_ARCH_NAMESPACE From 5c48da0aaf1f40fd03d9f87a3d2361794c44e3ef Mon Sep 17 00:00:00 2001 From: thomas retornaz Date: Mon, 5 Mar 2018 12:51:38 +0100 Subject: [PATCH 07/23] issue #107 add find,find_if,find_if_not --- doc/wiki/Main_Page.mwiki | 3 + doc/wiki/algorithm/find.mwiki | 27 +++++++++ doc/wiki/algorithm/find_if.mwiki | 27 +++++++++ doc/wiki/algorithm/find_if_not.mwiki | 27 +++++++++ simdpp/algorithm/find.h | 88 ++++++++++++++++++++++++++++ simdpp/algorithm/find_if.h | 73 +++++++++++++++++++++++ simdpp/algorithm/find_if_not.h | 73 +++++++++++++++++++++++ simdpp/simd.h | 4 ++ test/CMakeLists.txt | 5 ++ test/insn/find.cc | 59 +++++++++++++++++++ test/insn/find_if.cc | 78 ++++++++++++++++++++++++ test/insn/find_if_not.cc | 78 ++++++++++++++++++++++++ test/insn/tests.cc | 5 ++ test/insn/tests.h | 5 ++ 14 files changed, 552 insertions(+) create mode 100644 doc/wiki/algorithm/find.mwiki create mode 100644 doc/wiki/algorithm/find_if.mwiki create mode 100644 doc/wiki/algorithm/find_if_not.mwiki create mode 100644 simdpp/algorithm/find.h create mode 100644 simdpp/algorithm/find_if.h create mode 100644 simdpp/algorithm/find_if_not.h create mode 100644 test/insn/find.cc create mode 100644 test/insn/find_if.cc create mode 100644 test/insn/find_if_not.cc diff --git a/doc/wiki/Main_Page.mwiki b/doc/wiki/Main_Page.mwiki index c3e9a732..e8177f0e 100644 --- a/doc/wiki/Main_Page.mwiki +++ b/doc/wiki/Main_Page.mwiki @@ -178,6 +178,9 @@ the compiler will generate. {{ltt|algorithm/copy}}
{{ltt|algorithm/copy_n}}
{{ltt|algorithm/fill}}
+{{ltt|algorithm/find}}
+{{ltt|algorithm/find_if}}
+{{ltt|algorithm/find_if_not}}
{{ltt|algorithm/max}}
{{ltt|algorithm/min}}
{{ltt|algorithm/transform}}
diff --git a/doc/wiki/algorithm/find.mwiki b/doc/wiki/algorithm/find.mwiki new file mode 100644 index 00000000..e7b2489d --- /dev/null +++ b/doc/wiki/algorithm/find.mwiki @@ -0,0 +1,27 @@ +{{simdpp/title|find}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | + template + T const* find(T const* first, T const* last, U val); +}} + +{{dcl end}} +{{misc/navbar}} + +{{tt|find}} Returns the first element in the range [first, last[ that equal val. + +===Parameters=== +{{par begin}} +{{par | first, last | the range of elements to examine}} +{{par | value | value to compare the elements to.}} +{{par end}} + +===Return value=== +Address to the first element satisfying the condition or last if no such element is found. + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc find_if}} +{{dsc inc | algorithm/dsc find_if_not}} +{{dsc end}} diff --git a/doc/wiki/algorithm/find_if.mwiki b/doc/wiki/algorithm/find_if.mwiki new file mode 100644 index 00000000..39c048ce --- /dev/null +++ b/doc/wiki/algorithm/find_if.mwiki @@ -0,0 +1,27 @@ +{{simdpp/title|find_if}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | + template + T const* find_if(T const* first, T const* last, UnaryPredicate pred); +}} + +{{dcl end}} +{{misc/navbar}} + +{{tt|find_if}} Returns the first element in the range [first, last[ that satisfies specific predicate. + +===Parameters=== +{{par begin}} +{{par | first, last | the range of elements to examine}} +{{par | pred | unary predicate which returns {{true}} for the required element.}} +{{par end}} + +===Return value=== +Address to the first element satisfying the condition or last if no such element is found. + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc find}} +{{dsc inc | algorithm/dsc find_if_not}} +{{dsc end}} diff --git a/doc/wiki/algorithm/find_if_not.mwiki b/doc/wiki/algorithm/find_if_not.mwiki new file mode 100644 index 00000000..997808cc --- /dev/null +++ b/doc/wiki/algorithm/find_if_not.mwiki @@ -0,0 +1,27 @@ +{{simdpp/title|find_if_not}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | + template + T const* find_if_not(T const* first, T const* last, UnaryPredicate pred); +}} + +{{dcl end}} +{{misc/navbar}} + +{{tt|find_if}} Returns the first element in the range [first, last[ that {{not}} satisfies specific predicate. + +===Parameters=== +{{par begin}} +{{par | first, last | the range of elements to examine}} +{{par | pred | unary predicate which returns {{false}} for the required element.}} +{{par end}} + +===Return value=== +Address to the first element satisfying the condition or last if no such element is found. + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc find}} +{{dsc inc | algorithm/dsc find_if}} +{{dsc end}} diff --git a/simdpp/algorithm/find.h b/simdpp/algorithm/find.h new file mode 100644 index 00000000..38ada3e0 --- /dev/null +++ b/simdpp/algorithm/find.h @@ -0,0 +1,88 @@ +/* Copyright (C) 2018 Povilas Kanapickas + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_FIND_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_FIND_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include //find +#include +#include +#include + +namespace simdpp { + namespace SIMDPP_ARCH_NAMESPACE { + + template + T const* find(T const* first, T const* last, U val) + { + struct UnaryPredicateEqualValue + { + public: + UnaryPredicateEqualValue(T val) :m_val(val), m_val_simd(simdpp::splat(val)) {} + using simd_mask_T = typename simdpp::typetraits::simd_mask_type; + using simd_type_T = typename simdpp::typetraits::simd_type; + + bool operator()(T a) const { return a == m_val; } + simd_mask_T operator()(const simd_type_T& a) const { return cmp_eq(a, m_val_simd); } + private: + T m_val; + simd_type_T m_val_simd; + }; + +#ifndef NDEBUG //precondition debug mode + if (!first) + throw std::runtime_error("find - null ptr first."); + if (!last) + throw std::runtime_error("find - null ptr last."); +#endif + using simd_type_T = typename simdpp::typetraits::simd_type; + using simd_mask_T = typename simdpp::typetraits::simd_mask_type; + + if (first == last) return last; + + //define loopcounter + const auto simd_size = simd_type_T::base_length; + const auto size = std::distance(first, last); + //note enforce that input is aligned when we start the main simd loop + const auto range = simdpp::helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + //prologue + auto lastprologue = first + size_prologue_loop; + const auto resprologue = std::find(first, lastprologue, val); + if (resprologue != lastprologue) return resprologue; + + //simd loop + auto i = size_prologue_loop; + + //workaraund not test_bits_any for mask type + const simd_type_T on = simdpp::splat(T(1)); + const simd_type_T off = simdpp::splat(T(0)); + const auto pred = UnaryPredicateEqualValue(val); + for (; i < size_simd_loop; i += simd_size) + { + //TR why can't test_bits_any not available for mask? + const auto res = simdpp::blend(on, off, pred(simdpp::load(lastprologue))); + if (simdpp::test_bits_any(res)) //match extract exact position + { + return std::find_if(lastprologue, lastprologue + simd_size, pred); //or extract position from res ? + } + lastprologue += simd_size; + } + + //epilogue + return std::find(lastprologue, last, val); + } + + } // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_FIND_H \ No newline at end of file diff --git a/simdpp/algorithm/find_if.h b/simdpp/algorithm/find_if.h new file mode 100644 index 00000000..c57e2fec --- /dev/null +++ b/simdpp/algorithm/find_if.h @@ -0,0 +1,73 @@ +/* Copyright (C) 2018 Povilas Kanapickas + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_FIND_IF_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_FIND_IF_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include //find_if +#include +#include +#include + +namespace simdpp { + namespace SIMDPP_ARCH_NAMESPACE { + + template + T const* find_if(T const* first, T const* last, UnaryPredicate pred) + { +#ifndef NDEBUG //precondition debug mode + if (!first) + throw std::runtime_error("find_if - null ptr first."); + if (!last) + throw std::runtime_error("find_if - null ptr last."); +#endif + using simd_type_T = typename simdpp::typetraits::simd_type; + using simd_mask_T = typename simdpp::typetraits::simd_mask_type; + + if (first == last) return last; + + //define loopcounter + const auto simd_size = simd_type_T::base_length; + const auto size = std::distance(first, last); + //note enforce that input is aligned when we start the main simd loop + const auto range = simdpp::helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + //prologue + auto lastprologue = first + size_prologue_loop; + const auto resprologue = std::find_if(first, lastprologue, pred); + if (resprologue != lastprologue) return resprologue; + + //simd loop + auto i = size_prologue_loop; + + //workaraund not test_bits_any for mask type + const simd_type_T on = simdpp::splat(T(1)); + const simd_type_T off = simdpp::splat(T(0)); + for (; i < size_simd_loop; i += simd_size) + { + //TR why can't test_bits_any not available for mask? + const auto res = simdpp::blend(on, off, pred(simdpp::load(lastprologue))); + if (simdpp::test_bits_any(res)) //match extract exact position + { + return std::find_if(lastprologue, lastprologue + simd_size, pred); //or extract position from res ? + } + lastprologue += simd_size; + } + + //epilogue + return std::find_if(lastprologue, last, pred); + } + + } // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_FIND_IF_H \ No newline at end of file diff --git a/simdpp/algorithm/find_if_not.h b/simdpp/algorithm/find_if_not.h new file mode 100644 index 00000000..5982356c --- /dev/null +++ b/simdpp/algorithm/find_if_not.h @@ -0,0 +1,73 @@ +/* Copyright (C) 2018 Povilas Kanapickas + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_FIND_IF_NOT_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_FIND_IF_NOT_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include //find_if +#include +#include +#include + +namespace simdpp { + namespace SIMDPP_ARCH_NAMESPACE { + + template + T const* find_if_not(T const* first, T const* last, UnaryPredicate pred) + { +#ifndef NDEBUG //precondition debug mode + if (!first) + throw std::runtime_error("find_if - null ptr first."); + if (!last) + throw std::runtime_error("find_if - null ptr last."); +#endif + using simd_type_T = typename simdpp::typetraits::simd_type; + using simd_mask_T = typename simdpp::typetraits::simd_mask_type; + + if (first == last) return last; + + //define loopcounter + const auto simd_size = simd_type_T::base_length; + const auto size = std::distance(first, last); + //note enforce that input is aligned when we start the main simd loop + const auto range = simdpp::helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + //prologue + auto lastprologue = first + size_prologue_loop; + const auto resprologue = std::find_if_not(first, lastprologue, pred); + if (resprologue != lastprologue) return resprologue; + + //simd loop + auto i = size_prologue_loop; + + //workaraund not reduce_and for mask type + const simd_type_T on = simdpp::splat(T(1)); + const simd_type_T off = simdpp::splat(T(0)); + for (; i < size_simd_loop; i += simd_size) + { + //TR why can't reduce_and not available for mask? + const auto res = simdpp::blend(on, off, pred(simdpp::load(lastprologue))); + if (!simdpp::reduce_and(res)) //match extract exact position + { + return std::find_if_not(lastprologue, lastprologue + simd_size, pred); //or extract position from res ? + } + lastprologue += simd_size; + } + + //epilogue + return std::find_if_not(lastprologue, last, pred); + } + + } // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_FIND_IF_NOT_H \ No newline at end of file diff --git a/simdpp/simd.h b/simdpp/simd.h index f8ac6fd7..8e367ea2 100644 --- a/simdpp/simd.h +++ b/simdpp/simd.h @@ -196,9 +196,13 @@ #include #include +//algorithm #include #include #include +#include +#include +#include #include #include #include diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 881d2914..4731e371 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -67,11 +67,16 @@ set(TEST_INSN_ARCH_SOURCES insn/copy_n.cc insn/for_each.cc insn/fill.cc + insn/find.cc + insn/find_if.cc + insn/find_if_not.cc insn/math_fp.cc insn/math_int.cc insn/math_shift.cc + insn/max.cc insn/memory_load.cc insn/memory_store.cc + insn/min.cc insn/shuffle.cc insn/shuffle_bytes.cc insn/permute_generic.cc diff --git a/test/insn/find.cc b/test/insn/find.cc new file mode 100644 index 00000000..8759f558 --- /dev/null +++ b/test/insn/find.cc @@ -0,0 +1,59 @@ +/* Copyright (C) 2018 Povilas Kanapickas + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include +namespace SIMDPP_ARCH_NAMESPACE { + + template + void test_find_type(TestResultsSet& ts, TestReporter& tr) + { + using namespace simdpp; + using vector_aligned_t = std::vector::alignment>>; + {//test prologue + vector_aligned_t ivect(5); + std::iota(begin(ivect), end(ivect), T(1)); + auto resstd = std::find(begin(ivect), end(ivect),(T)3); + auto res = find(ivect.data(), ivect.data() + ivect.size(), (T)3); + TEST_EQUAL(tr, *resstd, *res); + } + { //test main loop and epilogue + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), T(1)); + auto resstd = std::find(begin(ivect), end(ivect),(T)98); + auto res = find(ivect.data(), ivect.data() + ivect.size(), (T)98); + TEST_EQUAL(tr, *resstd, *res); + } + { //test main loop + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), T(1)); + auto resstd = std::find(begin(ivect), end(ivect),(T)50); + auto res = find(ivect.data(), ivect.data() + ivect.size(), (T)50); + TEST_EQUAL(tr, *resstd, *res); + } + } + + void test_find(TestResults& res, TestReporter& tr) + { + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("find"); + test_find_type(ts, tr); + test_find_type(ts, tr); + // test_find_type(ts, tr); //FIXME + // test_find_type(ts, tr); //FIXME + test_find_type(ts, tr); + test_find_type(ts, tr); + test_find_type(ts, tr); + test_find_type(ts, tr); + test_find_type(ts, tr); + test_find_type(ts, tr); + } + +} // namespace SIMDPP_ARCH_NAMESPACE \ No newline at end of file diff --git a/test/insn/find_if.cc b/test/insn/find_if.cc new file mode 100644 index 00000000..8cc834b4 --- /dev/null +++ b/test/insn/find_if.cc @@ -0,0 +1,78 @@ +/* Copyright (C) 2018 Povilas Kanapickas + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include + +namespace SIMDPP_ARCH_NAMESPACE { + + template + struct UnaryPredicateSupValue + { + public: + UnaryPredicateSupValue(T val) :m_val(val), m_val_simd(simdpp::splat(val)) {} + using simd_mask_T = typename simdpp::typetraits::simd_mask_type; + using simd_type_T = typename simdpp::typetraits::simd_type; + + bool operator()(T a) const { return a > m_val; } + simd_mask_T operator()(const simd_type_T& a) const { return cmp_gt(a, m_val_simd); } + + T m_val; + simd_type_T m_val_simd; + }; + + template + void test_find_if_type(TestResultsSet& ts, TestReporter& tr) + { + using namespace simdpp; + using vector_aligned_t = std::vector::alignment>>; + {//test prologue + vector_aligned_t ivect(5); + std::iota(begin(ivect), end(ivect), T(1)); + const auto SupThree = UnaryPredicateSupValue((T)3); + auto resstd = std::find_if(begin(ivect), end(ivect), [](T el) { return el > 3; }); + auto res = find_if(ivect.data(), ivect.data() + ivect.size(), SupThree); + TEST_EQUAL(tr, *resstd, *res); + } + { //test main loop and epilogue + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), T(1)); + const auto predicate = UnaryPredicateSupValue((T)98); + auto resstd = std::find_if(begin(ivect), end(ivect), [](T el) { return el > 98; }); + auto res = find_if(ivect.data(), ivect.data() + ivect.size(), predicate); + TEST_EQUAL(tr, *resstd, *res); + } + { //test main loop + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), T(1)); + const auto predicate = UnaryPredicateSupValue((T)50); + auto resstd = std::find_if(begin(ivect), end(ivect), [](T el) { return el > 50; }); + auto res = find_if(ivect.data(), ivect.data() + ivect.size(), predicate); + TEST_EQUAL(tr, *resstd, *res); + } + } + + void test_find_if(TestResults& res, TestReporter& tr) + { + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("find_if"); + test_find_if_type(ts, tr); + test_find_if_type(ts, tr); + //test_find_if_type(ts, tr); //FIXME + //test_find_if_type(ts, tr); //FIXME + test_find_if_type(ts, tr); + test_find_if_type(ts, tr); + test_find_if_type(ts, tr); + test_find_if_type(ts, tr); + test_find_if_type(ts, tr); + test_find_if_type(ts, tr); + } + +} // namespace SIMDPP_ARCH_NAMESPACE \ No newline at end of file diff --git a/test/insn/find_if_not.cc b/test/insn/find_if_not.cc new file mode 100644 index 00000000..3a44274f --- /dev/null +++ b/test/insn/find_if_not.cc @@ -0,0 +1,78 @@ +/* Copyright (C) 2018 Povilas Kanapickas + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include + +namespace SIMDPP_ARCH_NAMESPACE { + + template + struct UnaryPredicateInfValue + { + public: + UnaryPredicateInfValue(T val) :m_val(val), m_val_simd(simdpp::splat(val)) {} + using simd_mask_T = typename simdpp::typetraits::simd_mask_type; + using simd_type_T = typename simdpp::typetraits::simd_type; + + bool operator()(T a) const { return a < m_val; } + simd_mask_T operator()(const simd_type_T& a) const { return cmp_lt(a, m_val_simd); } + private: + T m_val; + simd_type_T m_val_simd; + }; + + template + void test_find_if_not_type(TestResultsSet& ts, TestReporter& tr) + { + using namespace simdpp; + using vector_aligned_t = std::vector::alignment>>; + {//test prologue + vector_aligned_t ivect(5); + std::iota(begin(ivect), end(ivect), T(1)); + const auto SupThree = UnaryPredicateInfValue((T)3); + auto resstd = std::find_if_not(begin(ivect), end(ivect), [](T el) { return el < 3; }); + auto res = find_if_not(ivect.data(), ivect.data() + ivect.size(), SupThree); + TEST_EQUAL(tr, *resstd, *res); + } + { //test main loop and epilogue + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), T(1)); + const auto predicate = UnaryPredicateInfValue((T)98); + auto resstd = std::find_if_not(begin(ivect), end(ivect), [](T el) { return el < 98; }); + auto res = find_if_not(ivect.data(), ivect.data() + ivect.size(), predicate); + TEST_EQUAL(tr, *resstd, *res); + } + { //test main loop + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), T(1)); + const auto predicate = UnaryPredicateInfValue((T)50); + auto resstd = std::find_if_not(begin(ivect), end(ivect), [](T el) { return el < 50; }); + auto res = find_if_not(ivect.data(), ivect.data() + ivect.size(), predicate); + TEST_EQUAL(tr, *resstd, *res); + } + } + + void test_find_if_not(TestResults& res, TestReporter& tr) + { + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("find_if_not"); + //test_find_if_not_type(ts, tr); //FIXME missing reduce and + //test_find_if_not_type(ts, tr); //FIXME missing reduce and + //test_find_if_not_type(ts, tr); //FIXME + //test_find_if_not_type(ts, tr); //FIXME + test_find_if_not_type(ts, tr); + test_find_if_not_type(ts, tr); + test_find_if_not_type(ts, tr); + test_find_if_not_type(ts, tr); + test_find_if_not_type(ts, tr); + test_find_if_not_type(ts, tr); + } + +} // namespace SIMDPP_ARCH_NAMESPACE \ No newline at end of file diff --git a/test/insn/tests.cc b/test/insn/tests.cc index ced16636..bb8e73f8 100644 --- a/test/insn/tests.cc +++ b/test/insn/tests.cc @@ -78,6 +78,11 @@ namespace SIMDPP_ARCH_NAMESPACE { test_copy(res,tr); test_copy_n(res,tr); test_fill(res,tr); + test_find(res, tr); + test_find_if(res, tr); + test_find_if_not(res, tr); + test_max(res,tr); + test_min(res,tr); } } // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/tests.h b/test/insn/tests.h index 0160b22e..3658f9ac 100644 --- a/test/insn/tests.h +++ b/test/insn/tests.h @@ -26,11 +26,16 @@ void main_test_function(TestResults& res, TestReporter& tr, const TestOptions& o void test_copy_n(TestResults& res, TestReporter& tr); void test_for_each(TestResults& res, TestReporter& tr); void test_fill(TestResults& res, TestReporter& tr); + void test_find(TestResults& res, TestReporter& tr); + void test_find_if(TestResults& res, TestReporter& tr); + void test_find_if_not(TestResults& res, TestReporter& tr); void test_math_fp(TestResults& res, const TestOptions& opts); void test_math_int(TestResults& res); void test_math_shift(TestResults& res); + void test_max(TestResults& res, TestReporter& tr); void test_memory_load(TestResults& res, TestReporter& tr); void test_memory_store(TestResults& res, TestReporter& tr); + void test_min(TestResults& res, TestReporter& tr); void test_set(TestResults& res); void test_shuffle(TestResults& res); void test_shuffle_bytes(TestResults& res, TestReporter& tr); From ab3e92b62cbab6178cfbb68699ea68a29fc617dd Mon Sep 17 00:00:00 2001 From: RETORNAZ Date: Mon, 5 Mar 2018 17:45:58 +0100 Subject: [PATCH 08/23] issue #107 fix gcc and release mode for find* --- simdpp/algorithm/find.h | 23 ++++++++++++----------- simdpp/algorithm/find_if.h | 19 ++++++++++--------- simdpp/algorithm/find_if_not.h | 17 +++++++++-------- 3 files changed, 31 insertions(+), 28 deletions(-) diff --git a/simdpp/algorithm/find.h b/simdpp/algorithm/find.h index 38ada3e0..8023c882 100644 --- a/simdpp/algorithm/find.h +++ b/simdpp/algorithm/find.h @@ -26,9 +26,9 @@ namespace simdpp { struct UnaryPredicateEqualValue { public: - UnaryPredicateEqualValue(T val) :m_val(val), m_val_simd(simdpp::splat(val)) {} - using simd_mask_T = typename simdpp::typetraits::simd_mask_type; - using simd_type_T = typename simdpp::typetraits::simd_type; + UnaryPredicateEqualValue(T val) :m_val(val), m_val_simd(splat(val)) {} + using simd_mask_T = typename typetraits::simd_mask_type; + using simd_type_T = typename typetraits::simd_type; bool operator()(T a) const { return a == m_val; } simd_mask_T operator()(const simd_type_T& a) const { return cmp_eq(a, m_val_simd); } @@ -43,8 +43,8 @@ namespace simdpp { if (!last) throw std::runtime_error("find - null ptr last."); #endif - using simd_type_T = typename simdpp::typetraits::simd_type; - using simd_mask_T = typename simdpp::typetraits::simd_mask_type; + using simd_type_T = typename typetraits::simd_type; + using simd_mask_T =typename typetraits::simd_mask_type; if (first == last) return last; @@ -52,7 +52,7 @@ namespace simdpp { const auto simd_size = simd_type_T::base_length; const auto size = std::distance(first, last); //note enforce that input is aligned when we start the main simd loop - const auto range = simdpp::helper_input_range(first, last); + const auto range = helper_input_range(first, last); const auto size_prologue_loop = range.first; const auto size_simd_loop = range.second; @@ -65,14 +65,15 @@ namespace simdpp { auto i = size_prologue_loop; //workaraund not test_bits_any for mask type - const simd_type_T on = simdpp::splat(T(1)); - const simd_type_T off = simdpp::splat(T(0)); + const simd_type_T on = splat(T(1)); + const simd_type_T off = splat(T(0)); const auto pred = UnaryPredicateEqualValue(val); for (; i < size_simd_loop; i += simd_size) { //TR why can't test_bits_any not available for mask? - const auto res = simdpp::blend(on, off, pred(simdpp::load(lastprologue))); - if (simdpp::test_bits_any(res)) //match extract exact position + const simd_mask_T mask=pred(load(lastprologue)); + const auto res = blend(on, off,mask); + if (test_bits_any(res)) //match extract exact position { return std::find_if(lastprologue, lastprologue + simd_size, pred); //or extract position from res ? } @@ -85,4 +86,4 @@ namespace simdpp { } // namespace SIMDPP_ARCH_NAMESPACE } // namespace simdpp -#endif //LIBSIMDPP_SIMDPP_ALGORITHM_FIND_H \ No newline at end of file +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_FIND_H diff --git a/simdpp/algorithm/find_if.h b/simdpp/algorithm/find_if.h index c57e2fec..528a151c 100644 --- a/simdpp/algorithm/find_if.h +++ b/simdpp/algorithm/find_if.h @@ -29,16 +29,16 @@ namespace simdpp { if (!last) throw std::runtime_error("find_if - null ptr last."); #endif - using simd_type_T = typename simdpp::typetraits::simd_type; - using simd_mask_T = typename simdpp::typetraits::simd_mask_type; - + using simd_type_T = typename typetraits::simd_type; + using simd_mask_T = typename typetraits::simd_mask_type; + if (first == last) return last; //define loopcounter const auto simd_size = simd_type_T::base_length; const auto size = std::distance(first, last); //note enforce that input is aligned when we start the main simd loop - const auto range = simdpp::helper_input_range(first, last); + const auto range = helper_input_range(first, last); const auto size_prologue_loop = range.first; const auto size_simd_loop = range.second; @@ -51,13 +51,14 @@ namespace simdpp { auto i = size_prologue_loop; //workaraund not test_bits_any for mask type - const simd_type_T on = simdpp::splat(T(1)); - const simd_type_T off = simdpp::splat(T(0)); + const simd_type_T on = splat(T(1)); + const simd_type_T off = splat(T(0)); for (; i < size_simd_loop; i += simd_size) { //TR why can't test_bits_any not available for mask? - const auto res = simdpp::blend(on, off, pred(simdpp::load(lastprologue))); - if (simdpp::test_bits_any(res)) //match extract exact position + const simd_mask_T mask = pred(load(lastprologue)); + const auto res = blend(on, off,mask); + if (test_bits_any(res)) //match extract exact position { return std::find_if(lastprologue, lastprologue + simd_size, pred); //or extract position from res ? } @@ -70,4 +71,4 @@ namespace simdpp { } // namespace SIMDPP_ARCH_NAMESPACE } // namespace simdpp -#endif //LIBSIMDPP_SIMDPP_ALGORITHM_FIND_IF_H \ No newline at end of file +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_FIND_IF_H diff --git a/simdpp/algorithm/find_if_not.h b/simdpp/algorithm/find_if_not.h index 5982356c..4b854c7b 100644 --- a/simdpp/algorithm/find_if_not.h +++ b/simdpp/algorithm/find_if_not.h @@ -29,8 +29,8 @@ namespace simdpp { if (!last) throw std::runtime_error("find_if - null ptr last."); #endif - using simd_type_T = typename simdpp::typetraits::simd_type; - using simd_mask_T = typename simdpp::typetraits::simd_mask_type; + using simd_type_T = typename typetraits::simd_type; + using simd_mask_T = typename typetraits::simd_mask_type; if (first == last) return last; @@ -38,7 +38,7 @@ namespace simdpp { const auto simd_size = simd_type_T::base_length; const auto size = std::distance(first, last); //note enforce that input is aligned when we start the main simd loop - const auto range = simdpp::helper_input_range(first, last); + const auto range = helper_input_range(first, last); const auto size_prologue_loop = range.first; const auto size_simd_loop = range.second; @@ -51,13 +51,14 @@ namespace simdpp { auto i = size_prologue_loop; //workaraund not reduce_and for mask type - const simd_type_T on = simdpp::splat(T(1)); - const simd_type_T off = simdpp::splat(T(0)); + const simd_type_T on = splat(T(1)); + const simd_type_T off = splat(T(0)); for (; i < size_simd_loop; i += simd_size) { //TR why can't reduce_and not available for mask? - const auto res = simdpp::blend(on, off, pred(simdpp::load(lastprologue))); - if (!simdpp::reduce_and(res)) //match extract exact position + simd_mask_T mask = pred(load(lastprologue)); + const auto res = blend(on, off, mask); + if (!reduce_and(res)) //match extract exact position { return std::find_if_not(lastprologue, lastprologue + simd_size, pred); //or extract position from res ? } @@ -70,4 +71,4 @@ namespace simdpp { } // namespace SIMDPP_ARCH_NAMESPACE } // namespace simdpp -#endif //LIBSIMDPP_SIMDPP_ALGORITHM_FIND_IF_NOT_H \ No newline at end of file +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_FIND_IF_NOT_H From b0735f510bf232c32bb505f8d0aa2eed66e1d3f7 Mon Sep 17 00:00:00 2001 From: thomas retornaz Date: Tue, 6 Mar 2018 05:40:03 +0100 Subject: [PATCH 09/23] issue #107 add max_element and min_element --- doc/wiki/Main_Page.mwiki | 2 + doc/wiki/algorithm/max.mwiki | 2 + doc/wiki/algorithm/max_element.mwiki | 31 +++++++++ doc/wiki/algorithm/min.mwiki | 2 + doc/wiki/algorithm/min_element.mwiki | 31 +++++++++ simdpp/algorithm/max_element.h | 41 ++++++++++++ simdpp/algorithm/min_element.h | 42 ++++++++++++ simdpp/simd.h | 2 + test/CMakeLists.txt | 2 + test/insn/max_element.cc | 96 +++++++++++++++++++++++++++ test/insn/min_element.cc | 97 ++++++++++++++++++++++++++++ test/insn/tests.cc | 7 +- test/insn/tests.h | 2 + 13 files changed, 355 insertions(+), 2 deletions(-) create mode 100644 doc/wiki/algorithm/max_element.mwiki create mode 100644 doc/wiki/algorithm/min_element.mwiki create mode 100644 simdpp/algorithm/max_element.h create mode 100644 simdpp/algorithm/min_element.h create mode 100644 test/insn/max_element.cc create mode 100644 test/insn/min_element.cc diff --git a/doc/wiki/Main_Page.mwiki b/doc/wiki/Main_Page.mwiki index e8177f0e..1e3347de 100644 --- a/doc/wiki/Main_Page.mwiki +++ b/doc/wiki/Main_Page.mwiki @@ -182,7 +182,9 @@ the compiler will generate. {{ltt|algorithm/find_if}}
{{ltt|algorithm/find_if_not}}
{{ltt|algorithm/max}}
+{{ltt|algorithm/max_element}}
{{ltt|algorithm/min}}
+{{ltt|algorithm/min_element}}
{{ltt|algorithm/transform}}
{{ltt|algorithm/reduce}}
diff --git a/doc/wiki/algorithm/max.mwiki b/doc/wiki/algorithm/max.mwiki index 512a1fc8..dfd46ebf 100644 --- a/doc/wiki/algorithm/max.mwiki +++ b/doc/wiki/algorithm/max.mwiki @@ -26,4 +26,6 @@ template ===See also=== {{dsc begin}} {{dsc inc | algorithm/dsc min}} +{{dsc inc | algorithm/dsc max_element}} +{{dsc inc | algorithm/dsc min_element}} {{dsc end}} diff --git a/doc/wiki/algorithm/max_element.mwiki b/doc/wiki/algorithm/max_element.mwiki new file mode 100644 index 00000000..3113fccd --- /dev/null +++ b/doc/wiki/algorithm/max_element.mwiki @@ -0,0 +1,31 @@ +{{simdpp/title|max_element}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | num=1 | + template + T const * max_element(T const* first, T const* last); +}} +{{dcl | num=2 | + template + T const * max_element(T const* first, T const* last, Compare comp); +}} +{{dcl end}} +{{misc/navbar}} + +{{tt|max_element}} Returns the address of the element with the largest value in the range[first, last[ over comp (If no comp return simply the address of the max). + +===Parameters=== +{{par begin}} +{{par | first, last | the range of processed elements}} +{{par | comp | binary comparison operator}} +{{par end}} + +===Return value=== +(address of the max) + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc max}} +{{dsc inc | algorithm/dsc min_element}} +{{dsc inc | algorithm/dsc min}} +{{dsc end}} diff --git a/doc/wiki/algorithm/min.mwiki b/doc/wiki/algorithm/min.mwiki index 19fc0fac..40db99c3 100644 --- a/doc/wiki/algorithm/min.mwiki +++ b/doc/wiki/algorithm/min.mwiki @@ -26,4 +26,6 @@ template ===See also=== {{dsc begin}} {{dsc inc | algorithm/dsc max}} +{{dsc inc | algorithm/dsc max_element}} +{{dsc inc | algorithm/dsc min_element}} {{dsc end}} \ No newline at end of file diff --git a/doc/wiki/algorithm/min_element.mwiki b/doc/wiki/algorithm/min_element.mwiki new file mode 100644 index 00000000..d72fa233 --- /dev/null +++ b/doc/wiki/algorithm/min_element.mwiki @@ -0,0 +1,31 @@ +{{simdpp/title|min_element}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | num=1 | + template + T const * min_element(T const* first, T const* last); +}} +{{dcl | num=2 | + template + T const * min_element(T const* first, T const* last, Compare comp); +}} +{{dcl end}} +{{misc/navbar}} + +{{tt|min_element}} Returns the address of the element with the lowest value in the range[first, last[ over comp (If no comp return simply the address of the min). + +===Parameters=== +{{par begin}} +{{par | first, last | the range of processed elements}} +{{par | comp | binary comparison operator}} +{{par end}} + +===Return value=== +(address of the min) + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc max}} +{{dsc inc | algorithm/dsc max_element}} +{{dsc inc | algorithm/dsc min}} +{{dsc end}} diff --git a/simdpp/algorithm/max_element.h b/simdpp/algorithm/max_element.h new file mode 100644 index 00000000..bb861a25 --- /dev/null +++ b/simdpp/algorithm/max_element.h @@ -0,0 +1,41 @@ +/* Copyright (C) 2018 Povilas Kanapickas + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_MAX_ELEMENT_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_MAX_ELEMENT_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include //numeric_limits +#include //max +#include +#include +#include +#include + +namespace simdpp { + namespace SIMDPP_ARCH_NAMESPACE { + + template + T const * max_element(T const* first, T const* last, Compare comp) + { + if (first == last) return last; + return find(first, last, max(first, last, comp)); + } + + template + T const * max_element(T const* first, T const* last) + { + if (first == last) return last; + return find(first, last, max(first, last)); + } + + } // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_MAX_ELEMENT_H diff --git a/simdpp/algorithm/min_element.h b/simdpp/algorithm/min_element.h new file mode 100644 index 00000000..c17ce343 --- /dev/null +++ b/simdpp/algorithm/min_element.h @@ -0,0 +1,42 @@ +/* Copyright (C) 2018 Povilas Kanapickas + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_MIN_ELEMENT_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_MIN_ELEMENT_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include //numeric_limits +#include //min +#include +#include +#include +#include + +namespace simdpp { + namespace SIMDPP_ARCH_NAMESPACE { + + template + T const * min_element(T const* first, T const* last, Compare comp) + { + if (first == last) return last; + return find(first, last, min(first, last, comp)); + } + + template + T const * min_element(T const* first, T const* last) + { + if (first == last) return last; + return find(first, last, min(first, last)); + } + + } // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp + +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_MIN_ELEMENT_H diff --git a/simdpp/simd.h b/simdpp/simd.h index 8e367ea2..3b389642 100644 --- a/simdpp/simd.h +++ b/simdpp/simd.h @@ -204,7 +204,9 @@ #include #include #include +#include #include +#include #include #include diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 4731e371..ef8128c1 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -74,9 +74,11 @@ set(TEST_INSN_ARCH_SOURCES insn/math_int.cc insn/math_shift.cc insn/max.cc + insn/max_element.cc insn/memory_load.cc insn/memory_store.cc insn/min.cc + insn/min_element.cc insn/shuffle.cc insn/shuffle_bytes.cc insn/permute_generic.cc diff --git a/test/insn/max_element.cc b/test/insn/max_element.cc new file mode 100644 index 00000000..8adc4d0e --- /dev/null +++ b/test/insn/max_element.cc @@ -0,0 +1,96 @@ +/* Copyright (C) 2018 Povilas Kanapickas + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include +#include + + +namespace SIMDPP_ARCH_NAMESPACE { + + template + struct binary_cmp_greater + { + public: + binary_cmp_greater() = default; + using simd_mask_T = typename simdpp::typetraits::simd_mask_type; + using simd_type_T = typename simdpp::typetraits::simd_type; + + bool operator()(T a, T b) { return a > b; } + + simd_mask_T operator()(const simd_type_T& a, const simd_type_T& b) { return simdpp::cmp_gt(a, b); } + }; + + template + void test_max_element_type(TestResultsSet& ts, TestReporter& tr) + { + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + auto cmpOPGreater = binary_cmp_greater(); + {//test classical max + { //test prologue + vector_aligned_t ivect(5); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[0] = { (T)127 }; + TEST_EQUAL(tr, (T)127, *max_element(ivect.data(), ivect.data() + ivect.size())); + } + { //test epilogue + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[99] = { (T)127 }; + TEST_EQUAL(tr, (T)127, *max_element(ivect.data(), ivect.data() + ivect.size())); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[50] = { (T)127 }; + TEST_EQUAL(tr, (T)127, *max_element(ivect.data(), ivect.data() + ivect.size())); + } + } + {//test max with comp op + { //test prologue + vector_aligned_t ivect(5); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[0] = { (T)127 }; + TEST_EQUAL(tr, (T)127, *max_element(ivect.data(), ivect.data() + ivect.size()), cmpOPGreater); + } + { //test epilogue + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[99] = { (T)127 }; + TEST_EQUAL(tr, (T)127, *max_element(ivect.data(), ivect.data() + ivect.size()), cmpOPGreater); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[50] = { (T)127 }; + TEST_EQUAL(tr, (T)127, *max_element(ivect.data(), ivect.data() + ivect.size()), cmpOPGreater); } + } + + } + + void test_max_element(TestResults& res, TestReporter& tr) + { + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("max_element"); + test_max_element_type(ts, tr); + test_max_element_type(ts, tr); + //test_max_type(ts, tr); //FIXME + //test_max_type(ts, tr); //FIXME + test_max_element_type(ts, tr); + test_max_element_type(ts, tr); + test_max_element_type(ts, tr); + test_max_element_type(ts, tr); + test_max_element_type(ts, tr); + test_max_element_type(ts, tr); + } + +} // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/min_element.cc b/test/insn/min_element.cc new file mode 100644 index 00000000..abebc019 --- /dev/null +++ b/test/insn/min_element.cc @@ -0,0 +1,97 @@ +/* Copyright (C) 2018 Povilas Kanapickas + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include +#include + + +namespace SIMDPP_ARCH_NAMESPACE { + + template + struct binary_cmp_greater + { + public: + binary_cmp_greater() = default; + using simd_mask_T = typename simdpp::typetraits::simd_mask_type; + using simd_type_T = typename simdpp::typetraits::simd_type; + + SIMDPP_INL bool operator()(T a, T b) { return a > b; } + + SIMDPP_INL simd_mask_T operator()(const simd_type_T& a, const simd_type_T& b) { return simdpp::cmp_gt(a, b); } + }; + + template + void test_min_element_type(TestResultsSet& ts, TestReporter& tr) + { + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + auto cmpOPGreater = binary_cmp_greater(); + {//test classical max + { //test prologue + vector_aligned_t ivect(5); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[0] = { (T)0 }; + TEST_EQUAL(tr, (T)0, *min_element(ivect.data(), ivect.data() + ivect.size())); + } + { //test epilogue + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[99] = { (T)0 }; + TEST_EQUAL(tr, (T)0, *min_element(ivect.data(), ivect.data() + ivect.size())); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[50] = { (T)0 }; + TEST_EQUAL(tr, (T)0, *min_element(ivect.data(), ivect.data() + ivect.size())); + } + } + {//test max with comp op + { //test prologue + vector_aligned_t ivect(5); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[0] = { (T)0 }; + TEST_EQUAL(tr, (T)0, *min_element(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); + } + { //test epilogue + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[99] = { (T)0 }; + TEST_EQUAL(tr, (T)0, *min_element(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[50] = { (T)0 }; + TEST_EQUAL(tr, (T)0, *min_element(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); + } + } + + } + + void test_min_element(TestResults& res, TestReporter& tr) + { + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("min_element"); + test_min_element_type(ts, tr); + test_min_element_type(ts, tr); + //test_max_type(ts, tr); //FIXME + //test_max_type(ts, tr); //FIXME + test_min_element_type(ts, tr); + test_min_element_type(ts, tr); + test_min_element_type(ts, tr); + test_min_element_type(ts, tr); + test_min_element_type(ts, tr); + test_min_element_type(ts, tr); + } + +} // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/tests.cc b/test/insn/tests.cc index bb8e73f8..ff165826 100644 --- a/test/insn/tests.cc +++ b/test/insn/tests.cc @@ -73,8 +73,6 @@ namespace SIMDPP_ARCH_NAMESPACE { test_for_each(res, tr); //algorithm - test_transform(res, tr); - test_reduce(res, tr); test_copy(res,tr); test_copy_n(res,tr); test_fill(res,tr); @@ -82,7 +80,12 @@ namespace SIMDPP_ARCH_NAMESPACE { test_find_if(res, tr); test_find_if_not(res, tr); test_max(res,tr); + test_max_element(res, tr); test_min(res,tr); + test_min_element(res, tr); + test_reduce(res, tr); + test_transform(res, tr); + } } // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/tests.h b/test/insn/tests.h index 3658f9ac..a71d1b03 100644 --- a/test/insn/tests.h +++ b/test/insn/tests.h @@ -33,9 +33,11 @@ void main_test_function(TestResults& res, TestReporter& tr, const TestOptions& o void test_math_int(TestResults& res); void test_math_shift(TestResults& res); void test_max(TestResults& res, TestReporter& tr); + void test_max_element(TestResults& res, TestReporter& tr); void test_memory_load(TestResults& res, TestReporter& tr); void test_memory_store(TestResults& res, TestReporter& tr); void test_min(TestResults& res, TestReporter& tr); + void test_min_element(TestResults& res, TestReporter& tr); void test_set(TestResults& res); void test_shuffle(TestResults& res); void test_shuffle_bytes(TestResults& res, TestReporter& tr); From 0025a8ffcc9e7b0b31c26d574b7004a6a85a265a Mon Sep 17 00:00:00 2001 From: RETORNAZ Date: Tue, 6 Mar 2018 05:57:30 +0100 Subject: [PATCH 10/23] issue #107 gcc compil/warning fix --- simdpp/algorithm/find.h | 1 - simdpp/algorithm/find_if.h | 1 - simdpp/algorithm/max.h | 2 +- simdpp/algorithm/min.h | 2 +- simdpp/algorithm/reduce.h | 1 - test/insn/max_element.cc | 9 +++++---- 6 files changed, 7 insertions(+), 9 deletions(-) diff --git a/simdpp/algorithm/find.h b/simdpp/algorithm/find.h index 8023c882..c9fa597e 100644 --- a/simdpp/algorithm/find.h +++ b/simdpp/algorithm/find.h @@ -50,7 +50,6 @@ namespace simdpp { //define loopcounter const auto simd_size = simd_type_T::base_length; - const auto size = std::distance(first, last); //note enforce that input is aligned when we start the main simd loop const auto range = helper_input_range(first, last); const auto size_prologue_loop = range.first; diff --git a/simdpp/algorithm/find_if.h b/simdpp/algorithm/find_if.h index 528a151c..e1bd5874 100644 --- a/simdpp/algorithm/find_if.h +++ b/simdpp/algorithm/find_if.h @@ -36,7 +36,6 @@ namespace simdpp { //define loopcounter const auto simd_size = simd_type_T::base_length; - const auto size = std::distance(first, last); //note enforce that input is aligned when we start the main simd loop const auto range = helper_input_range(first, last); const auto size_prologue_loop = range.first; diff --git a/simdpp/algorithm/max.h b/simdpp/algorithm/max.h index 9a3dad4f..9547583b 100644 --- a/simdpp/algorithm/max.h +++ b/simdpp/algorithm/max.h @@ -34,7 +34,7 @@ namespace simdpp { throw std::runtime_error("max - null ptr last."); #endif using simd_type_T = typename typetraits::simd_type; - using simd_mask_T = typename simdpp::typetraits::simd_mask_type; + using simd_mask_T = typename typetraits::simd_mask_type; if (first == last) return comp(T(0), T(1)) ? std::numeric_limits::max() : std::numeric_limits::lowest(); //stolen from boost::simd diff --git a/simdpp/algorithm/min.h b/simdpp/algorithm/min.h index edfaf7f5..18920b9b 100644 --- a/simdpp/algorithm/min.h +++ b/simdpp/algorithm/min.h @@ -34,7 +34,7 @@ namespace simdpp { throw std::runtime_error("min - null ptr last."); #endif using simd_type_T = typename typetraits::simd_type; - using simd_mask_T = typename simdpp::typetraits::simd_mask_type; + using simd_mask_T = typename typetraits::simd_mask_type; if (first == last) return comp(T(0), T(1)) ? std::numeric_limits::lowest() : std::numeric_limits::max(); //stolen from boost::simd diff --git a/simdpp/algorithm/reduce.h b/simdpp/algorithm/reduce.h index c3d909cf..d729800c 100644 --- a/simdpp/algorithm/reduce.h +++ b/simdpp/algorithm/reduce.h @@ -33,7 +33,6 @@ namespace simdpp { throw std::runtime_error("reduce - null ptr last."); #endif using simd_type_T = typename typetraits::simd_type; - const auto alignment = typetraits::alignment; simd_type_T accusimd = splat((T)0); diff --git a/test/insn/max_element.cc b/test/insn/max_element.cc index 8adc4d0e..ec444c33 100644 --- a/test/insn/max_element.cc +++ b/test/insn/max_element.cc @@ -60,21 +60,22 @@ namespace SIMDPP_ARCH_NAMESPACE { vector_aligned_t ivect(5); std::iota(begin(ivect), end(ivect), (T)1); ivect[0] = { (T)127 }; - TEST_EQUAL(tr, (T)127, *max_element(ivect.data(), ivect.data() + ivect.size()), cmpOPGreater); + TEST_EQUAL(tr, (T)127, *max_element(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); } { //test epilogue vector_aligned_t ivect(100); std::iota(begin(ivect), end(ivect), (T)1); ivect[99] = { (T)127 }; - TEST_EQUAL(tr, (T)127, *max_element(ivect.data(), ivect.data() + ivect.size()), cmpOPGreater); + TEST_EQUAL(tr, (T)127, *max_element(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); } { //test main loop and epilogue on aligned vector vector_aligned_t ivect(100); std::iota(begin(ivect), end(ivect), (T)1); ivect[50] = { (T)127 }; - TEST_EQUAL(tr, (T)127, *max_element(ivect.data(), ivect.data() + ivect.size()), cmpOPGreater); } + TEST_EQUAL(tr, (T)127, *max_element(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); + } } - + } void test_max_element(TestResults& res, TestReporter& tr) From ae4802571def1f1ba3d858673ad3ad15cd409ae0 Mon Sep 17 00:00:00 2001 From: thomas retornaz Date: Wed, 7 Mar 2018 05:27:00 +0100 Subject: [PATCH 11/23] issue #107 add count, count_if --- doc/wiki/Main_Page.mwiki | 2 + doc/wiki/algorithm/count.mwiki | 27 ++++++++++++ doc/wiki/algorithm/count_if.mwiki | 28 ++++++++++++ simdpp/algorithm/count.h | 70 ++++++++++++++++++++++++++++++ simdpp/algorithm/count_if.h | 66 ++++++++++++++++++++++++++++ simdpp/simd.h | 2 + test/CMakeLists.txt | 8 ++-- test/insn/count.cc | 47 ++++++++++++++++++++ test/insn/count_if.cc | 71 +++++++++++++++++++++++++++++++ test/insn/tests.cc | 2 + test/insn/tests.h | 2 + 11 files changed, 322 insertions(+), 3 deletions(-) create mode 100644 doc/wiki/algorithm/count.mwiki create mode 100644 doc/wiki/algorithm/count_if.mwiki create mode 100644 simdpp/algorithm/count.h create mode 100644 simdpp/algorithm/count_if.h create mode 100644 test/insn/count.cc create mode 100644 test/insn/count_if.cc diff --git a/doc/wiki/Main_Page.mwiki b/doc/wiki/Main_Page.mwiki index 1e3347de..cbd80a99 100644 --- a/doc/wiki/Main_Page.mwiki +++ b/doc/wiki/Main_Page.mwiki @@ -177,6 +177,8 @@ the compiler will generate.
{{ltt|algorithm/copy}}
{{ltt|algorithm/copy_n}}
+{{ltt|algorithm/count}}
+{{ltt|algorithm/count_if}}
{{ltt|algorithm/fill}}
{{ltt|algorithm/find}}
{{ltt|algorithm/find_if}}
diff --git a/doc/wiki/algorithm/count.mwiki b/doc/wiki/algorithm/count.mwiki new file mode 100644 index 00000000..0e9eb1e9 --- /dev/null +++ b/doc/wiki/algorithm/count.mwiki @@ -0,0 +1,27 @@ +{{simdpp/title|count}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | + template + typename std::iterator_traits::difference_type + count(T const* first, T const* last, U val); +}} + +{{dcl end}} +{{misc/navbar}} + +{{tt|count}} counts the elements that are equal to value in the range defined by {{tt|[first, last)}}. + +===Parameters=== +{{par begin}} +{{par | first, last | the range of elements to examine}} +{{par | val | the value to search for}} +{{par end}} + +===Return value=== +number of elements satisfying the condition. + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc count_if}} +{{dsc end}} diff --git a/doc/wiki/algorithm/count_if.mwiki b/doc/wiki/algorithm/count_if.mwiki new file mode 100644 index 00000000..592b7032 --- /dev/null +++ b/doc/wiki/algorithm/count_if.mwiki @@ -0,0 +1,28 @@ +{{simdpp/title|count_if}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | + template + typename std::iterator_traits::difference_type + count_if(T const* first, T const* last, UnaryPredicate pred); +}} + +{{dcl end}} +{{misc/navbar}} + +{{tt|count_if}} counts the elements that satisfied the predicate in the range defined by {{tt|[first, last)}}. + +===Parameters=== +{{par begin}} +{{par | first, last | the range of elements to examine}} +{{par | pred | unary predicate which returns {{tt|true}} for the required elements. }} +{{par end}} + +===Return value=== +number of elements satisfying the condition. + + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc count}} +{{dsc end}} diff --git a/simdpp/algorithm/count.h b/simdpp/algorithm/count.h new file mode 100644 index 00000000..ea78f002 --- /dev/null +++ b/simdpp/algorithm/count.h @@ -0,0 +1,70 @@ +/* Copyright (C) 2018 Povilas Kanapickas + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_COUNT_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_COUNT_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include + +namespace simdpp { + namespace SIMDPP_ARCH_NAMESPACE { + + template + typename std::iterator_traits::difference_type + count(T const* first, T const* last, U val) + { +#ifndef NDEBUG //precondition debug mode + if (!first) + throw std::runtime_error("count - null ptr first."); + if (!last) + throw std::runtime_error("count - null ptr last."); +#endif + + using simd_type_T = typename typetraits::simd_type; + using simd_mask_T = typename typetraits::simd_mask_type; + using return_type = typename std::iterator_traits::difference_type; + if (first == last) return (return_type)0; + //define loopcounter + const auto simd_size = simd_type_T::base_length; + + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + //prologue + auto lastprologue = first + size_prologue_loop; + return_type res = std::count(first, lastprologue, val); + + //simd loop + auto i = size_prologue_loop; + + //workaraund not reduce_add for mask type + const simd_type_T on = splat(T(1)); + const simd_type_T off = splat(T(0)); + const simd_type_T valsimd = splat(U(val)); + + for (; i < size_simd_loop; i += simd_size) + { + const simd_type_T el = load(lastprologue); + const simd_mask_T mask = cmp_eq(el, valsimd); + const auto rescurrentsimd = blend(on, off, mask); + res += (return_type)reduce_add(rescurrentsimd); + lastprologue += simd_size; + } + res += std::count(lastprologue, last, val); + return res; + } + + } // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp + +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_COUNT_H diff --git a/simdpp/algorithm/count_if.h b/simdpp/algorithm/count_if.h new file mode 100644 index 00000000..d19aff26 --- /dev/null +++ b/simdpp/algorithm/count_if.h @@ -0,0 +1,66 @@ +/* Copyright (C) 2018 Povilas Kanapickas + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_COUNT_IF_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_COUNT_IF_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +namespace simdpp { + namespace SIMDPP_ARCH_NAMESPACE { + + template + typename std::iterator_traits::difference_type + count_if(T const* first, T const* last, UnaryPredicate pred) + { +#ifndef NDEBUG //precondition debug mode + if (!first) + throw std::runtime_error("count_if - null ptr first."); + if (!last) + throw std::runtime_error("count_if - null ptr last."); +#endif + + using simd_type_T = typename typetraits::simd_type; + using simd_mask_T = typename typetraits::simd_mask_type; + using return_type = typename std::iterator_traits::difference_type; + if (first == last) return (return_type)0; + //define loopcounter + const auto simd_size = simd_type_T::base_length; + + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + //prologue + auto lastprologue = first + size_prologue_loop; + return_type res = std::count_if(first, lastprologue, pred); + + //simd loop + auto i = size_prologue_loop; + + //workaraund not reduce_add for mask type + const simd_type_T on = splat(T(1)); + const simd_type_T off = splat(T(0)); + for (; i < size_simd_loop; i += simd_size) + { + const simd_type_T el = load(lastprologue); + const simd_mask_T mask = pred(el); + const auto rescurrentsimd = blend(on, off, mask); + res += (return_type)reduce_add(rescurrentsimd); + lastprologue += simd_size; + } + res += std::count_if(lastprologue, last, pred); + return res; + } + + } // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp + +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_COUNT_IF_H diff --git a/simdpp/simd.h b/simdpp/simd.h index 3b389642..a918b167 100644 --- a/simdpp/simd.h +++ b/simdpp/simd.h @@ -199,6 +199,8 @@ //algorithm #include #include +#include +#include #include #include #include diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index ef8128c1..481a012e 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -65,16 +65,18 @@ set(TEST_INSN_ARCH_SOURCES insn/convert.cc insn/copy.cc insn/copy_n.cc + insn/count.cc + insn/count_if.cc insn/for_each.cc insn/fill.cc insn/find.cc - insn/find_if.cc - insn/find_if_not.cc + insn/find_if.cc + insn/find_if_not.cc insn/math_fp.cc insn/math_int.cc insn/math_shift.cc insn/max.cc - insn/max_element.cc + insn/max_element.cc insn/memory_load.cc insn/memory_store.cc insn/min.cc diff --git a/test/insn/count.cc b/test/insn/count.cc new file mode 100644 index 00000000..9ea7261a --- /dev/null +++ b/test/insn/count.cc @@ -0,0 +1,47 @@ +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include + +namespace SIMDPP_ARCH_NAMESPACE { + + template + void test_count_type(TestResultsSet& ts, TestReporter& tr) + { + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + { //test prologue + vector_t ivect = { (T)42,(T)42 }; + auto res = count(ivect.data(), ivect.data() + ivect.size(), (T)42); + auto resstd = std::count(begin(ivect), end(ivect), (T)42); + TEST_EQUAL(tr, res, resstd); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(50, (T)42); + ivect[25] = (T)0; + ivect[49] = (T)0; + auto res = count(ivect.data(), ivect.data() + ivect.size(), (T)42); + auto resstd = std::count(begin(ivect), end(ivect), (T)42); + TEST_EQUAL(tr, res, resstd); + } + } + + void test_count(TestResults& res, TestReporter& tr) + { + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("count"); + test_count_type(ts, tr); + test_count_type(ts, tr); + test_count_type(ts, tr); + test_count_type(ts, tr); + test_count_type(ts, tr); + test_count_type(ts, tr); + test_count_type(ts, tr); + test_count_type(ts, tr); + test_count_type(ts, tr); + test_count_type(ts, tr); + } + +} // namespace SIMDPP_ARCH_NAMESPACE \ No newline at end of file diff --git a/test/insn/count_if.cc b/test/insn/count_if.cc new file mode 100644 index 00000000..8b310b8c --- /dev/null +++ b/test/insn/count_if.cc @@ -0,0 +1,71 @@ +/* Copyright (C) 2018 Povilas Kanapickas + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include +#include + +namespace SIMDPP_ARCH_NAMESPACE { + + template + struct UnaryPredicateEqualValue + { + public: + UnaryPredicateEqualValue(T val) :m_val(val), m_val_simd(simdpp::splat(val)) {} + using simd_mask_T = typename simdpp::typetraits::simd_mask_type; + using simd_type_T = typename simdpp::typetraits::simd_type; + + bool operator()(T a) const { return a == m_val; } + simd_mask_T operator()(const simd_type_T& a) const { return cmp_eq(a, m_val_simd); } + + T m_val; + simd_type_T m_val_simd; + }; + + template + void test_count_if_type(TestResultsSet& ts, TestReporter& tr) + { + using namespace simdpp; + using vector_t = std::vector; + const auto pred = UnaryPredicateEqualValue((T)42); + using vector_aligned_t = std::vector::alignment>>; + { //test prologue + vector_t ivect = { (T)42,(T)42 }; + auto res = count_if(ivect.data(), ivect.data() + ivect.size(), pred); + auto resstd = std::count_if(begin(ivect), end(ivect), pred); + TEST_EQUAL(tr, res, resstd); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(50, (T)42); + ivect[25] = (T)0; + ivect[49] = (T)0; + auto res = count_if(ivect.data(), ivect.data() + ivect.size(), pred); + auto resstd = std::count_if(begin(ivect), end(ivect), pred); + TEST_EQUAL(tr, res, resstd); + } + } + + void test_count_if(TestResults& res, TestReporter& tr) + { + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("count_if"); + test_count_if_type(ts, tr); + test_count_if_type(ts, tr); + test_count_if_type(ts, tr); + test_count_if_type(ts, tr); + test_count_if_type(ts, tr); + test_count_if_type(ts, tr); + test_count_if_type(ts, tr); + test_count_if_type(ts, tr); + test_count_if_type(ts, tr); + test_count_if_type(ts, tr); + } + +} // namespace SIMDPP_ARCH_NAMESPACE \ No newline at end of file diff --git a/test/insn/tests.cc b/test/insn/tests.cc index ff165826..31bbab79 100644 --- a/test/insn/tests.cc +++ b/test/insn/tests.cc @@ -75,6 +75,8 @@ namespace SIMDPP_ARCH_NAMESPACE { //algorithm test_copy(res,tr); test_copy_n(res,tr); + test_count(res,tr); + test_count_if(res,tr); test_fill(res,tr); test_find(res, tr); test_find_if(res, tr); diff --git a/test/insn/tests.h b/test/insn/tests.h index a71d1b03..05808efc 100644 --- a/test/insn/tests.h +++ b/test/insn/tests.h @@ -24,6 +24,8 @@ void main_test_function(TestResults& res, TestReporter& tr, const TestOptions& o void test_construct(TestResults& res); void test_copy(TestResults& res, TestReporter& tr); void test_copy_n(TestResults& res, TestReporter& tr); + void test_count(TestResults& res, TestReporter& tr); + void test_count_if(TestResults& res, TestReporter& tr); void test_for_each(TestResults& res, TestReporter& tr); void test_fill(TestResults& res, TestReporter& tr); void test_find(TestResults& res, TestReporter& tr); From dc33d00490f544707ff1b4711ed6faa1f1989247 Mon Sep 17 00:00:00 2001 From: RETORNAZ Date: Wed, 7 Mar 2018 13:49:39 +0100 Subject: [PATCH 12/23] issue #107 add all_of, any_of, none_of --- doc/wiki/Main_Page.mwiki | 3 ++ doc/wiki/algorithm/all_of.mwiki | 27 ++++++++++ doc/wiki/algorithm/any_of.mwiki | 27 ++++++++++ doc/wiki/algorithm/none_of.mwiki | 27 ++++++++++ simdpp/algorithm/all_of.h | 69 +++++++++++++++++++++++++ simdpp/algorithm/any_of.h | 68 +++++++++++++++++++++++++ simdpp/algorithm/none_of.h | 35 +++++++++++++ simdpp/simd.h | 3 ++ test/CMakeLists.txt | 3 ++ test/insn/all_of.cc | 86 ++++++++++++++++++++++++++++++++ test/insn/any_of.cc | 73 +++++++++++++++++++++++++++ test/insn/none_of.cc | 74 +++++++++++++++++++++++++++ test/insn/tests.cc | 3 ++ test/insn/tests.h | 3 ++ 14 files changed, 501 insertions(+) create mode 100644 doc/wiki/algorithm/all_of.mwiki create mode 100644 doc/wiki/algorithm/any_of.mwiki create mode 100644 doc/wiki/algorithm/none_of.mwiki create mode 100644 simdpp/algorithm/all_of.h create mode 100644 simdpp/algorithm/any_of.h create mode 100644 simdpp/algorithm/none_of.h create mode 100644 test/insn/all_of.cc create mode 100644 test/insn/any_of.cc create mode 100644 test/insn/none_of.cc diff --git a/doc/wiki/Main_Page.mwiki b/doc/wiki/Main_Page.mwiki index cbd80a99..64390c3c 100644 --- a/doc/wiki/Main_Page.mwiki +++ b/doc/wiki/Main_Page.mwiki @@ -175,6 +175,8 @@ the compiler will generate. '''[[Algorithm | STL like algorithm]]'''
+{{ltt|algorithm/any_of}}
+{{ltt|algorithm/all_of}}
{{ltt|algorithm/copy}}
{{ltt|algorithm/copy_n}}
{{ltt|algorithm/count}}
@@ -187,6 +189,7 @@ the compiler will generate. {{ltt|algorithm/max_element}}
{{ltt|algorithm/min}}
{{ltt|algorithm/min_element}}
+{{ltt|algorithm/none_of}}
{{ltt|algorithm/transform}}
{{ltt|algorithm/reduce}}
diff --git a/doc/wiki/algorithm/all_of.mwiki b/doc/wiki/algorithm/all_of.mwiki new file mode 100644 index 00000000..f47536a4 --- /dev/null +++ b/doc/wiki/algorithm/all_of.mwiki @@ -0,0 +1,27 @@ +{{simdpp/title|all_of}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | + template + bool all_of(T const* first, T const* last, UnaryPredicate pred); +}} + +{{dcl end}} +{{misc/navbar}} + +{{tt|all_of}} Checks if unary predicate p returns true for {{tt|all}} elements in the range {{tt|[first, last)}}. + +===Parameters=== +{{par begin}} +{{par | first, last | the range of elements to examine}} +{{par | pred | unary predicate +{{par end}} + +===Return value=== +{{tt|true}} if unary predicate returns {{tt|true}} for all elements in the range, {{tt|false}} otherwise. Returns {{tt|true}} if the range is empty. + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc none_of}} +{{dsc inc | algorithm/dsc any_of}} +{{dsc end}} diff --git a/doc/wiki/algorithm/any_of.mwiki b/doc/wiki/algorithm/any_of.mwiki new file mode 100644 index 00000000..678837c6 --- /dev/null +++ b/doc/wiki/algorithm/any_of.mwiki @@ -0,0 +1,27 @@ +{{simdpp/title|any_of}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | + template + bool any_of(T const* first, T const* last, UnaryPredicate pred); +}} + +{{dcl end}} +{{misc/navbar}} + +{{tt|all_of}} Checks if unary predicate p returns true for {{tt|at least one}} element in the range {{tt|[first, last)}}. + +===Parameters=== +{{par begin}} +{{par | first, last | the range of elements to examine}} +{{par | pred | unary predicate +{{par end}} + +===Return value=== +{{tt|true}} if unary predicate returns {{tt|true}} for at least one element in the range, {{tt|false}} otherwise. Returns {{tt|false}} if the range is empty. + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc none_of}} +{{dsc inc | algorithm/dsc all_of}} +{{dsc end}} diff --git a/doc/wiki/algorithm/none_of.mwiki b/doc/wiki/algorithm/none_of.mwiki new file mode 100644 index 00000000..f49df565 --- /dev/null +++ b/doc/wiki/algorithm/none_of.mwiki @@ -0,0 +1,27 @@ +{{simdpp/title|none_of}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | + template + bool none_of(T const* first, T const* last, UnaryPredicate pred); +}} + +{{dcl end}} +{{misc/navbar}} + +{{tt|none_of}} Checks if unary predicate p returns true for {{tt|no}} elements in the range {{tt|[first, last)}}. + +===Parameters=== +{{par begin}} +{{par | first, last | the range of elements to examine}} +{{par | pred | unary predicate +{{par end}} + +===Return value=== +{{tt|true}} if unary predicate returns {{tt|true}} for no elements in the range, {{tt|false}} otherwise. Returns {{tt|true}} if the range is empty. + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc all_of}} +{{dsc inc | algorithm/dsc any_of}} +{{dsc end}} diff --git a/simdpp/algorithm/all_of.h b/simdpp/algorithm/all_of.h new file mode 100644 index 00000000..5f53fe05 --- /dev/null +++ b/simdpp/algorithm/all_of.h @@ -0,0 +1,69 @@ +/* Copyright (C) 2018 Povilas Kanapickas + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_ALL_OF_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_ALL_OF_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include +#include +#include + +namespace simdpp { + namespace SIMDPP_ARCH_NAMESPACE { + + template + bool all_of(T const* first, T const* last, UnaryPredicate pred) + { +#ifndef NDEBUG //precondition debug mode + if (!first) + throw std::runtime_error("all_of - null ptr first."); + if (!last) + throw std::runtime_error("all_of - null ptr last."); +#endif + + using simd_type_T = typename typetraits::simd_type; + using simd_mask_T = typename typetraits::simd_mask_type; + + //define loopcounter + const auto simd_size = simd_type_T::base_length; + + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + //prologue + auto lastprologue = first + size_prologue_loop; + if(!std::all_of(first, lastprologue, pred)) return false; + + //simd loop + auto i = size_prologue_loop; + //workaraund not reduce_add for mask type + const simd_type_T on = splat(T(1)); + const simd_type_T off = splat(T(0)); + for (; i < size_simd_loop; i += simd_size) + { + simd_mask_T mask = pred(load(lastprologue)); + const auto res = blend(on, off, mask); + + if (!reduce_and(res)) + { + return false; + } + lastprologue += simd_size; + } + if(!std::all_of(lastprologue,last, pred)) return false; + return true; + } + } // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp + +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_ALL_OF_H diff --git a/simdpp/algorithm/any_of.h b/simdpp/algorithm/any_of.h new file mode 100644 index 00000000..30ee5194 --- /dev/null +++ b/simdpp/algorithm/any_of.h @@ -0,0 +1,68 @@ +/* Copyright (C) 2018 Povilas Kanapickas + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_ANY_OF_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_ANY_OF_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include +#include +#include + +namespace simdpp { + namespace SIMDPP_ARCH_NAMESPACE { + + template + bool any_of(T const* first, T const* last, UnaryPredicate pred) + { +#ifndef NDEBUG //precondition debug mode + if (!first) + throw std::runtime_error("any_of - null ptr first."); + if (!last) + throw std::runtime_error("any_of - null ptr last."); +#endif + + using simd_type_T = typename typetraits::simd_type; + using simd_mask_T = typename typetraits::simd_mask_type; + + //define loopcounter + const auto simd_size = simd_type_T::base_length; + + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + //prologue + auto lastprologue = first + size_prologue_loop; + if(std::any_of(first, lastprologue, pred)) return true; + + //simd loop + auto i = size_prologue_loop; + //workaraund not test_bits_any for mask type + const simd_type_T on = splat(T(1)); //TODO factorize + const simd_type_T off = splat(T(0)); + for (; i < size_simd_loop; i += simd_size) + { + simd_mask_T mask = pred(load(lastprologue)); //TODO factorize + const auto res = blend(on, off, mask); + if (test_bits_any(res)) + { + return true; + } + lastprologue += simd_size; + } + return std::any_of(lastprologue,last, pred); + } + + } // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp + +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_ANY_OF_H diff --git a/simdpp/algorithm/none_of.h b/simdpp/algorithm/none_of.h new file mode 100644 index 00000000..66dacbcd --- /dev/null +++ b/simdpp/algorithm/none_of.h @@ -0,0 +1,35 @@ +/* Copyright (C) 2018 Povilas Kanapickas + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_NONE_OF_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_NONE_OF_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include + +namespace simdpp { + namespace SIMDPP_ARCH_NAMESPACE { + + template + bool none_of(T const* first, T const* last, UnaryPredicate pred) + { +#ifndef NDEBUG //precondition debug mode + if (!first) + throw std::runtime_error("none_of - null ptr first."); + if (!last) + throw std::runtime_error("none_of - null ptr last."); +#endif + return !any_of(first,last,pred); + } + + } // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp + +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_NONE_OF_H diff --git a/simdpp/simd.h b/simdpp/simd.h index a918b167..465039a8 100644 --- a/simdpp/simd.h +++ b/simdpp/simd.h @@ -197,6 +197,8 @@ #include //algorithm +#include +#include #include #include #include @@ -209,6 +211,7 @@ #include #include #include +#include #include #include diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 481a012e..23237a60 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -58,6 +58,8 @@ set(TEST_INSN_HEADERS ) set(TEST_INSN_ARCH_SOURCES + insn/all_of.cc + insn/any_of.cc insn/bitwise.cc insn/blend.cc insn/compare.cc @@ -81,6 +83,7 @@ set(TEST_INSN_ARCH_SOURCES insn/memory_store.cc insn/min.cc insn/min_element.cc + insn/none_of.cc insn/shuffle.cc insn/shuffle_bytes.cc insn/permute_generic.cc diff --git a/test/insn/all_of.cc b/test/insn/all_of.cc new file mode 100644 index 00000000..11040fe4 --- /dev/null +++ b/test/insn/all_of.cc @@ -0,0 +1,86 @@ +/* Copyright (C) 2018 Povilas Kanapickas + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include + +namespace SIMDPP_ARCH_NAMESPACE { + + template + struct UnaryPredicateEqualValue + { + public: + UnaryPredicateEqualValue(T val) :m_val(val), m_val_simd(simdpp::splat(val)) {} + using simd_mask_T = typename simdpp::typetraits::simd_mask_type; + using simd_type_T = typename simdpp::typetraits::simd_type; + + bool operator()(T a) const { return a == m_val; } + simd_mask_T operator()(const simd_type_T& a) const { return cmp_eq(a, m_val_simd); } + + T m_val; + simd_type_T m_val_simd; + }; + + + template + void test_all_of_type(TestResultsSet& ts, TestReporter& tr) + { + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + + {//test with predicate + const auto predEqualTen = UnaryPredicateEqualValue((T)10); + const auto predEqualFive = UnaryPredicateEqualValue((T)5); + { //test prologue + vector_t ivect = { (T)10,(T)10 }; + auto res = all_of(ivect.data(), ivect.data() + ivect.size(), predEqualTen); + auto resstd = std::all_of(begin(ivect), end(ivect), predEqualTen); + TEST_EQUAL(tr, res, resstd); + } + { //test prologue + vector_t ivect = { (T)10,(T)10 }; + auto res = all_of(ivect.data(), ivect.data() + ivect.size(), predEqualFive); + auto resstd = std::all_of(begin(ivect), end(ivect), predEqualFive); + TEST_EQUAL(tr, res, resstd); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(50, (T)10); + auto res = all_of(ivect.data(), ivect.data() + ivect.size(), predEqualTen); + auto resstd = std::all_of(begin(ivect), end(ivect), predEqualTen); + TEST_EQUAL(tr, res, resstd); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(50, (T)10); + auto res = all_of(ivect.data(), ivect.data() + ivect.size(), predEqualFive); + auto resstd = std::all_of(begin(ivect), end(ivect), predEqualFive); + TEST_EQUAL(tr, res, resstd); + } + } + + } + + void test_all_of(TestResults& res, TestReporter& tr) + { + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("all_of"); + //test_all_of_type(ts, tr); //FIXME + //test_all_of_type(ts, tr); //FIXME + test_all_of_type(ts, tr); + test_all_of_type(ts, tr); + test_all_of_type(ts, tr); + test_all_of_type(ts, tr); + test_all_of_type(ts, tr); + test_all_of_type(ts, tr); + test_all_of_type(ts, tr); + test_all_of_type(ts, tr); + } + +} // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/any_of.cc b/test/insn/any_of.cc new file mode 100644 index 00000000..de7f2092 --- /dev/null +++ b/test/insn/any_of.cc @@ -0,0 +1,73 @@ +/* Copyright (C) 2018 Povilas Kanapickas + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include + +namespace SIMDPP_ARCH_NAMESPACE { + + template + struct UnaryPredicateEqualValue + { + public: + UnaryPredicateEqualValue(T val) :m_val(val), m_val_simd(simdpp::splat(val)) {} + using simd_mask_T = typename simdpp::typetraits::simd_mask_type; + using simd_type_T = typename simdpp::typetraits::simd_type; + + bool operator()(T a) const { return a == m_val; } + simd_mask_T operator()(const simd_type_T& a) const { return cmp_eq(a, m_val_simd); } + + T m_val; + simd_type_T m_val_simd; + }; + + + template + void test_any_of_type(TestResultsSet& ts, TestReporter& tr) + { + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + + {//test with predicate + const auto predEqualTen = UnaryPredicateEqualValue((T)10); + { //test prologue + vector_t ivect = { (T)1,(T)10 }; + auto res = any_of(ivect.data(), ivect.data() + ivect.size(), predEqualTen); + auto resstd = std::any_of(begin(ivect), end(ivect), predEqualTen); + TEST_EQUAL(tr, res, resstd); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(50, (T)5); + auto res = any_of(ivect.data(), ivect.data() + ivect.size(), predEqualTen); + auto resstd = std::any_of(begin(ivect), end(ivect), predEqualTen); + TEST_EQUAL(tr, res, resstd); + } + } + + } + + void test_any_of(TestResults& res, TestReporter& tr) + { + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("any_of"); + //test_any_of_type(ts, tr); //FIXME + //test_any_of_type(ts, tr); //FIXME + test_any_of_type(ts, tr); + test_any_of_type(ts, tr); + test_any_of_type(ts, tr); + test_any_of_type(ts, tr); + test_any_of_type(ts, tr); + test_any_of_type(ts, tr); + test_any_of_type(ts, tr); + test_any_of_type(ts, tr); + } + +} // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/none_of.cc b/test/insn/none_of.cc new file mode 100644 index 00000000..b878a61e --- /dev/null +++ b/test/insn/none_of.cc @@ -0,0 +1,74 @@ +/* Copyright (C) 2018 Povilas Kanapickas + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include + +namespace SIMDPP_ARCH_NAMESPACE { + + template + struct UnaryPredicateEqualValue + { + public: + UnaryPredicateEqualValue(T val) :m_val(val), m_val_simd(simdpp::splat(val)) {} + using simd_mask_T = typename simdpp::typetraits::simd_mask_type; + using simd_type_T = typename simdpp::typetraits::simd_type; + + bool operator()(T a) const { return a == m_val; } + simd_mask_T operator()(const simd_type_T& a) const { return cmp_eq(a, m_val_simd); } + + T m_val; + simd_type_T m_val_simd; + }; + + + template + void test_none_of_type(TestResultsSet& ts, TestReporter& tr) + { + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + + {//test with predicate + const auto predEqualTen = UnaryPredicateEqualValue((T)10); + { //test prologue + vector_t ivect = { (T)1,(T)2}; + auto res = none_of(ivect.data(), ivect.data() + ivect.size(), predEqualTen); + auto resstd = std::none_of(begin(ivect), end(ivect), predEqualTen); + TEST_EQUAL(tr, res, resstd); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(50, (T)5); + ivect[49]=(T)10; + auto res = none_of(ivect.data(), ivect.data() + ivect.size(), predEqualTen); + auto resstd = std::none_of(begin(ivect), end(ivect), predEqualTen); + TEST_EQUAL(tr, res, resstd); + } + } + + } + + void test_none_of(TestResults& res, TestReporter& tr) + { + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("none_of"); + //test_none_of_type(ts, tr); //FIXME + //test_none_of_type(ts, tr); //FIXME + test_none_of_type(ts, tr); + test_none_of_type(ts, tr); + test_none_of_type(ts, tr); + test_none_of_type(ts, tr); + test_none_of_type(ts, tr); + test_none_of_type(ts, tr); + test_none_of_type(ts, tr); + test_none_of_type(ts, tr); + } + +} // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/tests.cc b/test/insn/tests.cc index 31bbab79..879f5656 100644 --- a/test/insn/tests.cc +++ b/test/insn/tests.cc @@ -73,6 +73,8 @@ namespace SIMDPP_ARCH_NAMESPACE { test_for_each(res, tr); //algorithm + test_all_of(res,tr); + test_any_of(res,tr); test_copy(res,tr); test_copy_n(res,tr); test_count(res,tr); @@ -85,6 +87,7 @@ namespace SIMDPP_ARCH_NAMESPACE { test_max_element(res, tr); test_min(res,tr); test_min_element(res, tr); + test_none_of(res,tr); test_reduce(res, tr); test_transform(res, tr); diff --git a/test/insn/tests.h b/test/insn/tests.h index 05808efc..339f195e 100644 --- a/test/insn/tests.h +++ b/test/insn/tests.h @@ -17,6 +17,8 @@ namespace SIMDPP_ARCH_NAMESPACE { void main_test_function(TestResults& res, TestReporter& tr, const TestOptions& opts); + void test_all_of(TestResults& res, TestReporter& tr); + void test_any_of(TestResults& res, TestReporter& tr); void test_bitwise(TestResults& res, TestReporter& tr); void test_blend(TestResults& res); void test_compare(TestResults& res); @@ -40,6 +42,7 @@ void main_test_function(TestResults& res, TestReporter& tr, const TestOptions& o void test_memory_store(TestResults& res, TestReporter& tr); void test_min(TestResults& res, TestReporter& tr); void test_min_element(TestResults& res, TestReporter& tr); + void test_none_of(TestResults& res, TestReporter& tr); void test_set(TestResults& res); void test_shuffle(TestResults& res); void test_shuffle_bytes(TestResults& res, TestReporter& tr); From d6a6bfa49ba0c3823e5439645b6f7185aa11f0b0 Mon Sep 17 00:00:00 2001 From: RETORNAZ Date: Thu, 8 Mar 2018 09:01:49 +0100 Subject: [PATCH 13/23] issue #107 add replace,replace_if --- doc/wiki/Main_Page.mwiki | 4 +- doc/wiki/algorithm/replace.mwiki | 27 ++++++++++ doc/wiki/algorithm/replace_if.mwiki | 27 ++++++++++ simdpp/algorithm/replace.h | 41 +++++++++++++++ simdpp/algorithm/replace_if.h | 41 +++++++++++++++ simdpp/simd.h | 5 +- test/CMakeLists.txt | 2 + test/insn/replace.cc | 62 ++++++++++++++++++++++ test/insn/replace_if.cc | 79 +++++++++++++++++++++++++++++ test/insn/tests.cc | 2 + test/insn/tests.h | 2 + 11 files changed, 290 insertions(+), 2 deletions(-) create mode 100644 doc/wiki/algorithm/replace.mwiki create mode 100644 doc/wiki/algorithm/replace_if.mwiki create mode 100644 simdpp/algorithm/replace.h create mode 100644 simdpp/algorithm/replace_if.h create mode 100644 test/insn/replace.cc create mode 100644 test/insn/replace_if.cc diff --git a/doc/wiki/Main_Page.mwiki b/doc/wiki/Main_Page.mwiki index 64390c3c..660798ed 100644 --- a/doc/wiki/Main_Page.mwiki +++ b/doc/wiki/Main_Page.mwiki @@ -190,8 +190,10 @@ the compiler will generate. {{ltt|algorithm/min}}
{{ltt|algorithm/min_element}}
{{ltt|algorithm/none_of}}
-{{ltt|algorithm/transform}}
{{ltt|algorithm/reduce}}
+{{ltt|algorithm/replace}}
+{{ltt|algorithm/replace_if}}
+{{ltt|algorithm/transform}}
|- class="row rowbottom" diff --git a/doc/wiki/algorithm/replace.mwiki b/doc/wiki/algorithm/replace.mwiki new file mode 100644 index 00000000..412a9e4b --- /dev/null +++ b/doc/wiki/algorithm/replace.mwiki @@ -0,0 +1,27 @@ +{{simdpp/title|replace}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | + template + void replace(T * first, T * last, T const & old_val, T const & new_val); +}} + +{{dcl end}} +{{misc/navbar}} + +{{tt|replace}} Replaces all elements that are equal to old_val by new_val in the range defined by {{tt|[first, last)}}. + +===Parameters=== +{{par begin}} +{{par | first, last | the range of elements to examine}} +{{par | old_val | the value of elements to replace}} +{{par | new_val | the value to use as replacement}} +{{par end}} + +===Return value=== +(None) + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc replace_if}} +{{dsc end}} diff --git a/doc/wiki/algorithm/replace_if.mwiki b/doc/wiki/algorithm/replace_if.mwiki new file mode 100644 index 00000000..90c76fa9 --- /dev/null +++ b/doc/wiki/algorithm/replace_if.mwiki @@ -0,0 +1,27 @@ +{{simdpp/title|replace_if}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | + template + void replace_if(T* first, T* last, UnaryPredicate pred , const T& new_val); +}} + +{{dcl end}} +{{misc/navbar}} + +{{tt|replace_if}} Replaces all elements for which predicate p returns {{tt|true}}. in the range defined by {{tt|[first, last)}}. + +===Parameters=== +{{par begin}} +{{par | first, last | the range of elements to examine}} +{{par | old_val | the value of elements to replace}} +{{par | pred | unary predicate which returns ​true if the element value should be replaced.}} +{{par end}} + +===Return value=== +(None) + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc replace}} +{{dsc end}} diff --git a/simdpp/algorithm/replace.h b/simdpp/algorithm/replace.h new file mode 100644 index 00000000..53643446 --- /dev/null +++ b/simdpp/algorithm/replace.h @@ -0,0 +1,41 @@ +/* Copyright (C) 2018 Povilas Kanapickas + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_REPLACE_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_REPLACE_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include + +namespace simdpp { + namespace SIMDPP_ARCH_NAMESPACE { + + template + void replace(T* first, T* last, T const & old_val, T const & new_val) + { + struct local_predicate + { + using simd_type_T = typename typetraits::simd_type; + local_predicate(const T & old_val, const T & new_val) : m_old_val(old_val), m_new_val(new_val),m_old_val_simd(splat(old_val)), m_new_val_simd(splat(new_val)) {} + + T operator()( const T& a) const { return a == m_old_val ? m_new_val : a;} + simd_type_T operator()(const simd_type_T& a) const { return blend(m_new_val_simd,a,cmp_eq(a,m_old_val_simd)); } + + T m_old_val, m_new_val; + simd_type_T m_old_val_simd, m_new_val_simd; + }; + + transform(first, last, first, local_predicate(old_val, new_val)); + } + + } // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp + +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_REPLACE_H diff --git a/simdpp/algorithm/replace_if.h b/simdpp/algorithm/replace_if.h new file mode 100644 index 00000000..435e006f --- /dev/null +++ b/simdpp/algorithm/replace_if.h @@ -0,0 +1,41 @@ +/* Copyright (C) 2018 Povilas Kanapickas + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_REPLACE_IF_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_REPLACE_IF_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include + +namespace simdpp { + namespace SIMDPP_ARCH_NAMESPACE { + + template + void replace_if(T* first, T* last, UnaryPredicate pred , const T& new_val) + { + struct local_predicate + { + using simd_type_T = typename typetraits::simd_type; + local_predicate(const UnaryPredicate& pred,const T & new_val) : m_new_val(new_val),m_new_val_simd(splat(new_val)),m_pred(pred) {} + + T operator()( const T& a) const { return m_pred(a) ? m_new_val : a;} + simd_type_T operator()(const simd_type_T& a) const { return blend(m_new_val_simd,a,m_pred(a)); } + + T m_new_val; + simd_type_T m_new_val_simd; + UnaryPredicate m_pred; + }; + + transform(first, last, first, local_predicate(pred, new_val)); + } + } // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp + +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_REPLACE_IF_H diff --git a/simdpp/simd.h b/simdpp/simd.h index 465039a8..0b824189 100644 --- a/simdpp/simd.h +++ b/simdpp/simd.h @@ -212,8 +212,11 @@ #include #include #include -#include #include +#include +#include +#include + /** @def SIMDPP_NO_DISPATCHER Disables internal dispatching functionality. If the internal dispathcher diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 23237a60..c2d3ce37 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -88,6 +88,8 @@ set(TEST_INSN_ARCH_SOURCES insn/shuffle_bytes.cc insn/permute_generic.cc insn/reduce.cc + insn/replace.cc + insn/replace_if.cc insn/shuffle_generic.cc insn/test_utils.cc insn/tests.cc diff --git a/test/insn/replace.cc b/test/insn/replace.cc new file mode 100644 index 00000000..880a5d98 --- /dev/null +++ b/test/insn/replace.cc @@ -0,0 +1,62 @@ +/* Copyright (C) 2018 Povilas Kanapickas + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include +#include + +namespace SIMDPP_ARCH_NAMESPACE { + + template + void test_replace_type(TestResultsSet& ts, TestReporter& tr) + { + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + { //test prologue + vector_t ivect = { (T)42,(T)42 }; + vector_t expected = { (T)0,(T)0 }; + + replace(ivect.data(),ivect.data()+ivect.size(),(T)42,(T)0 ); + + for (auto i = 0; i < expected.size(); ++i) //TODO make TEST_EQUAL_COLECTIONS + { + TEST_EQUAL(tr, ivect[i], expected[i]); + } + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(50); + std::iota(begin(ivect),end(ivect),(T)0); + vector_aligned_t expected(50); + std::copy(begin(ivect),end(ivect),begin(expected)); + expected[39]=42; + replace(ivect.data(),ivect.data()+ivect.size(),(T)39,(T)42 ); + for (auto i = 0; i < expected.size(); ++i) //TODO make TEST_EQUAL_COLECTIONS + TEST_EQUAL(tr, expected[i], ivect[i]); + } + } + + void test_replace(TestResults& res, TestReporter& tr) + { + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("replace"); + test_replace_type(ts, tr); + test_replace_type(ts, tr); + test_replace_type(ts, tr); + test_replace_type(ts, tr); + test_replace_type(ts, tr); + test_replace_type(ts, tr); + test_replace_type(ts, tr); + test_replace_type(ts, tr); + test_replace_type(ts, tr); + test_replace_type(ts, tr); + } + +} // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/replace_if.cc b/test/insn/replace_if.cc new file mode 100644 index 00000000..96578160 --- /dev/null +++ b/test/insn/replace_if.cc @@ -0,0 +1,79 @@ +/* Copyright (C) 2018 Povilas Kanapickas + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include +#include + +namespace SIMDPP_ARCH_NAMESPACE { + + template + struct UnaryPredicateEqualValue + { + public: + UnaryPredicateEqualValue(T val) :m_val(val), m_val_simd(simdpp::splat(val)) {} + using simd_mask_T = typename simdpp::typetraits::simd_mask_type; + using simd_type_T = typename simdpp::typetraits::simd_type; + + bool operator()(T a) const { return a == m_val; } + simd_mask_T operator()(const simd_type_T& a) const { return cmp_eq(a, m_val_simd); } + + T m_val; + simd_type_T m_val_simd; + }; + + template + void test_replace_if_type(TestResultsSet& ts, TestReporter& tr) + { + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + + { //test prologue + vector_t ivect = { (T)42,(T)42 }; + vector_t expected = { (T)0,(T)0 }; + const auto pred = UnaryPredicateEqualValue((T)42); + replace_if(ivect.data(),ivect.data()+ivect.size(),pred,(T)0 ); + + for (auto i = 0; i < expected.size(); ++i) //TODO make TEST_EQUAL_COLECTIONS + { + TEST_EQUAL(tr, ivect[i], expected[i]); + } + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(50); + std::iota(begin(ivect),end(ivect),(T)0); + vector_aligned_t expected(50); + std::copy(begin(ivect),end(ivect),begin(expected)); + expected[39]=42; + const auto pred = UnaryPredicateEqualValue((T)39); + replace_if(ivect.data(),ivect.data()+ivect.size(),pred,(T)42 ); + for (auto i = 0; i < expected.size(); ++i) //TODO make TEST_EQUAL_COLECTIONS + TEST_EQUAL(tr, expected[i], ivect[i]); + } + } + + void test_replace_if(TestResults& res, TestReporter& tr) + { + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("replace_if"); + test_replace_if_type(ts, tr); + test_replace_if_type(ts, tr); + test_replace_if_type(ts, tr); + test_replace_if_type(ts, tr); + test_replace_if_type(ts, tr); + test_replace_if_type(ts, tr); + test_replace_if_type(ts, tr); + test_replace_if_type(ts, tr); + test_replace_if_type(ts, tr); + test_replace_if_type(ts, tr); + } + +} // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/tests.cc b/test/insn/tests.cc index 879f5656..66cd90fb 100644 --- a/test/insn/tests.cc +++ b/test/insn/tests.cc @@ -88,6 +88,8 @@ namespace SIMDPP_ARCH_NAMESPACE { test_min(res,tr); test_min_element(res, tr); test_none_of(res,tr); + test_replace(res,tr); + test_replace_if(res,tr); test_reduce(res, tr); test_transform(res, tr); diff --git a/test/insn/tests.h b/test/insn/tests.h index 339f195e..739166ef 100644 --- a/test/insn/tests.h +++ b/test/insn/tests.h @@ -43,6 +43,8 @@ void main_test_function(TestResults& res, TestReporter& tr, const TestOptions& o void test_min(TestResults& res, TestReporter& tr); void test_min_element(TestResults& res, TestReporter& tr); void test_none_of(TestResults& res, TestReporter& tr); + void test_replace(TestResults& res, TestReporter& tr); + void test_replace_if(TestResults& res, TestReporter& tr); void test_set(TestResults& res); void test_shuffle(TestResults& res); void test_shuffle_bytes(TestResults& res, TestReporter& tr); From f95aa05c86c04aeacb87923a38573db6a228337a Mon Sep 17 00:00:00 2001 From: RETORNAZ Date: Sat, 10 Mar 2018 06:12:05 +0100 Subject: [PATCH 14/23] issue #107 add equal and lexicographic_compare --- doc/wiki/Main_Page.mwiki | 2 + doc/wiki/algorithm/equal.mwiki | 35 +++++ .../algorithm/lexicographical_compare.mwiki | 34 +++++ simdpp/algorithm/equal.h | 99 +++++++++++++ simdpp/algorithm/lexicographical_compare.h | 131 ++++++++++++++++++ simdpp/simd.h | 2 + test/CMakeLists.txt | 2 + test/insn/equal.cc | 59 ++++++++ test/insn/lexicographical_compare.cc | 63 +++++++++ test/insn/tests.cc | 2 + test/insn/tests.h | 2 + 11 files changed, 431 insertions(+) create mode 100644 doc/wiki/algorithm/equal.mwiki create mode 100644 doc/wiki/algorithm/lexicographical_compare.mwiki create mode 100644 simdpp/algorithm/equal.h create mode 100644 simdpp/algorithm/lexicographical_compare.h create mode 100644 test/insn/equal.cc create mode 100644 test/insn/lexicographical_compare.cc diff --git a/doc/wiki/Main_Page.mwiki b/doc/wiki/Main_Page.mwiki index 660798ed..094a325c 100644 --- a/doc/wiki/Main_Page.mwiki +++ b/doc/wiki/Main_Page.mwiki @@ -181,10 +181,12 @@ the compiler will generate. {{ltt|algorithm/copy_n}}
{{ltt|algorithm/count}}
{{ltt|algorithm/count_if}}
+{{ltt|algorithm/equal}}
{{ltt|algorithm/fill}}
{{ltt|algorithm/find}}
{{ltt|algorithm/find_if}}
{{ltt|algorithm/find_if_not}}
+{{ltt|algorithm/lexicographical_compare}}
{{ltt|algorithm/max}}
{{ltt|algorithm/max_element}}
{{ltt|algorithm/min}}
diff --git a/doc/wiki/algorithm/equal.mwiki b/doc/wiki/algorithm/equal.mwiki new file mode 100644 index 00000000..b5ae22e7 --- /dev/null +++ b/doc/wiki/algorithm/equal.mwiki @@ -0,0 +1,35 @@ +{{simdpp/title|equal}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | num=1 | + template + bool equal(const T* first1, const T* last1, const T* first2); +}} +{{dcl | num=2 | + template + bool equal(const T* first1, const T* last1, const T* first2,BinaryPredicate pred); + +}} +{{dcl end}} +{{misc/navbar}} + +{{tt|equal}} Returns {{c|true}} if the range {{tt|[first1,last1)}} is equal to the range {{tt|[first2, first2 + (last1 - first1))}} according to pred and {{c|false}} otherwise + +1) Returns {{c|true}} if the range {{tt|[first1,last1)}} is equal to the range {{tt|[first2, first2 + (last1 - first1))}}, and {{c|false}} otherwise +2) The binary operation BinaryPredicate is applied to pairs of elements from two ranges: one defined by [first1, last1) and the other beginning at first2. + +===Parameters=== +{{par begin}} +{{par | first1, last1 | the first range of elements to examine} +{{par | first2 | the beginning of the second range of elements to examine}} +{{par | pred | binary predicate which returns ​true if the elements should be treated as equal.}} +{{par end}} + +===Return value=== +If the elements in the two ranges are equal, returns {{c|true}}.Otherwise returns {{c|false}}. + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc find }} +{{dsc inc | algorithm/dsc lexicographic_compare }} +{{dsc end}} diff --git a/doc/wiki/algorithm/lexicographical_compare.mwiki b/doc/wiki/algorithm/lexicographical_compare.mwiki new file mode 100644 index 00000000..363f6e7e --- /dev/null +++ b/doc/wiki/algorithm/lexicographical_compare.mwiki @@ -0,0 +1,34 @@ +{{simdpp/title|lexicographical_compare}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | num=1 | + template + bool lexicographical_compare(const T* first1, const T* last1, const T* first2, const T* last2); +}} +{{dcl | num=2 | + + template + bool lexicographical_compare(const T* first1, const T* last1, const T* first2, const T* last2,BinarayPredicate comp); +}} +{{dcl end}} +{{misc/navbar}} + +{{tt|lexicographical_compare}} Checks if the first range {{c|[first1, last1)}} is lexicographically ''less'' than the second range {{c|[first2, last2)}}. + +@1@ Elements are compared using {{tt|operator<}}. +@2@ Elements are compared using the given binary comparison function {{tt|comp}}. + +===Parameters=== +{{par begin}} +{{par | first1, last1 | the first range of elements to examine}} +{{par | first2, last2 | the second range of elements to examine}} +{{par | comp | comparaison operator}} +{{par end}} + +===Return value=== +{{c|true}} if the first range is lexicographically ''less'' than the second. + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc equal }} +{{dsc end}} diff --git a/simdpp/algorithm/equal.h b/simdpp/algorithm/equal.h new file mode 100644 index 00000000..52e1e5b1 --- /dev/null +++ b/simdpp/algorithm/equal.h @@ -0,0 +1,99 @@ +/* Copyright (C) 2018 Povilas Kanapickas + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_EQUAL_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_EQUAL_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include + +namespace simdpp { + namespace SIMDPP_ARCH_NAMESPACE { + + template + bool equal(const T* first1, const T* last1, const T* first2,BinaryPredicate pred) + { + +#ifndef NDEBUG //precondition debug mode + if (!first1) + throw std::runtime_error("equal - null ptr first1."); + if (!last1) + throw std::runtime_error("equal - null ptr last1."); + if (!first2) + throw std::runtime_error("equal - null ptr first2."); +#endif + using simd_type_T = typename typetraits::simd_type; + //using simd_mask_T = typename typetraits::simd_mask_type; + auto alignment = typetraits::alignment; + + //define loopcounter + const auto simd_size = simd_type_T::base_length; + + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first1, last1); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + //prologue + auto last1prologue = first1 + size_prologue_loop; + auto last2prologue = first2 + size_prologue_loop; + if(!std::equal(first1, last1prologue,first2,pred)) return false; + + auto i=size_prologue_loop; + //workaound no reduce_and for mask_type + const simd_type_T on=splat(T(1)); + const simd_type_T off=splat(T(0)); + //---main simd loop + if (detail::is_aligned(last2prologue, alignment)) + { + for (; i < size_simd_loop; i += simd_size) + { + const simd_type_T element1 = load(last1prologue); + const simd_type_T element2 = load(last2prologue); + const simd_type_T res=blend(on,off,pred(element1,element2)); //workaound no reduce_and for mask_type + if(!reduce_and(res)) return false; + last1prologue += simd_size; + last2prologue += simd_size; + } + } + else + { + for (; i < size_simd_loop; i += simd_size) + { + const simd_type_T element1 = load(last1prologue); + const simd_type_T element2 = load_u(last2prologue); + const simd_type_T res=blend(on,off,pred(element1,element2));//workaound no reduce_and for mask_type + if(!reduce_and(res)) return false; + last1prologue += simd_size; + last2prologue += simd_size; + } + } + + if(!std::equal(last1prologue, last1,last2prologue,pred)) return false; + return true; + } + + template + bool equal(const T* first1, const T* last1, const T* first2) + { + struct local_bynary_predicate_equal + { + using simd_type_T = typename typetraits::simd_type; + using simd_mask_T = typename typetraits::simd_mask_type; + bool operator()(const T& a0,const T& a1) {return a0==a1;} + simd_mask_T operator()(const simd_type_T& a0,const simd_type_T& a1) {return cmp_eq(a0,a1);} + }; + return equal(first1,last1,first2,local_bynary_predicate_equal()); + } + + } // namespace SIMDPP_ARCH_NAMESPACE +} // namespace simdpp + +#endif //LIBSIMDPP_SIMDPP_ALGORITHM_EQUAL_H diff --git a/simdpp/algorithm/lexicographical_compare.h b/simdpp/algorithm/lexicographical_compare.h new file mode 100644 index 00000000..4ac7aa02 --- /dev/null +++ b/simdpp/algorithm/lexicographical_compare.h @@ -0,0 +1,131 @@ +/* Copyright (C) 2018 Povilas Kanapickas + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_LEXICOGRAPHICAL8COMPARE_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_LEXICOGRAPHICAL8COMPARE_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace simdpp { + namespace SIMDPP_ARCH_NAMESPACE { + + template + bool lexicographical_compare(const T* first1, const T* last1, const T* first2, const T* last2,BinarayPredicate comp) + { +#ifndef NDEBUG //precondition debug mode + if (!first1) + throw std::runtime_error("lexicographical_compare - null ptr first1."); + if (!last1) + throw std::runtime_error("lexicographical_compare - null ptr last1."); + if (!first2) + throw std::runtime_error("lexicographical_compare - null ptr first2."); + if (!last2) + throw std::runtime_error("lexicographical_compare - null ptr last2."); +#endif + + using simd_type_T = typename typetraits::simd_type; + + auto alignment = typetraits::alignment; + using difference_type_T = typename std::iterator_traits::difference_type; + difference_type_T d1 = std::distance(first1, last1); + difference_type_T d2 = std::distance(first2, last2); + bool shorter = d1 < d2; + auto last = shorter ? last1 : first1+d2; + auto size = shorter ? d1 : d2; + + //define loopcounter + const auto simd_size = simd_type_T::base_length; + + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first1, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + auto i=0u; + //---prologue + for (; i < size_prologue_loop; ++i) + { + if (comp(*first1, *first2)) return true; + if (comp(*first2, *first1)) return false; + ++first1; + ++first2; + } + + //---main simd loop + if (detail::is_aligned(first2, alignment)) + { + for (; i < size_simd_loop; i += simd_size) + { + simd_type_T element1 = load(first1); + simd_type_T element2 = load(first2); + if(comp(element1, element2)) return true; + if(comp(element2, element1)) return false; + first1 += simd_size; + first2 += simd_size; + } + } + else + { + for (; i < size_simd_loop; i += simd_size) + { + simd_type_T element1 = load(first1); + simd_type_T element2 = load_u(first2); + if(comp(element1, element2)) return true; + if(comp(element2, element1)) return false; + first1 += simd_size; + first2 += simd_size; + } + } + //---epilogue + for (; i < size; ++i) + { + if (comp(*first1, *first2)) return true; + if (comp(*first2, *first1)) return false; + ++first1; + ++first2; + } + return shorter; + } + + template + bool lexicographical_compare(const T* first1, const T* last1, const T* first2, const T* last2) + { + struct local_binary_predicate_less + { + using simd_type_T = typename typetraits::simd_type; + + local_binary_predicate_less():on(splat(T(1))),off(splat(T(0))) {} + + bool operator()(T a0,T a1) { return a0 #include #include +#include #include #include #include #include +#include #include #include #include diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c2d3ce37..49bfe641 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -69,11 +69,13 @@ set(TEST_INSN_ARCH_SOURCES insn/copy_n.cc insn/count.cc insn/count_if.cc + insn/equal.cc insn/for_each.cc insn/fill.cc insn/find.cc insn/find_if.cc insn/find_if_not.cc + insn/lexicographical_compare.cc insn/math_fp.cc insn/math_int.cc insn/math_shift.cc diff --git a/test/insn/equal.cc b/test/insn/equal.cc new file mode 100644 index 00000000..5944c653 --- /dev/null +++ b/test/insn/equal.cc @@ -0,0 +1,59 @@ +/* Copyright (C) 2018 Povilas Kanapickas + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include + + +namespace SIMDPP_ARCH_NAMESPACE { + + template + void test_equal_type(TestResultsSet& ts, TestReporter& tr) + { + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + { //test prologue + vector_t ivect1 = { (T)42,(T)42 }; + vector_t ivect2 = { (T)0,(T)0 }; + + auto res=equal(ivect1.data(), ivect1.data() + ivect1.size(), ivect2.data()); + TEST_EQUAL(tr,res,false); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect1(50); + std::iota(begin(ivect1),end(ivect1),(T)1); + vector_aligned_t ivect2(50); + std::copy(begin(ivect1),end(ivect1),begin(ivect2)); + auto res=equal(ivect1.data(), ivect1.data() + ivect1.size(), ivect2.data()); + TEST_EQUAL(tr,res,true); + ivect2[25]=0; + auto res2=equal(ivect1.data(), ivect1.data() + ivect1.size(), ivect2.data()); + TEST_EQUAL(tr,res2,false); + } + } + + void test_equal(TestResults& res, TestReporter& tr) + { + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("copy"); + // test_equal_type(ts, tr); //FIXME + // test_equal_type(ts, tr); //FIXME + test_equal_type(ts, tr); + test_equal_type(ts, tr); + test_equal_type(ts, tr); + test_equal_type(ts, tr); + test_equal_type(ts, tr); + test_equal_type(ts, tr); + test_equal_type(ts, tr); + test_equal_type(ts, tr); + } + +} // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/lexicographical_compare.cc b/test/insn/lexicographical_compare.cc new file mode 100644 index 00000000..ecd50257 --- /dev/null +++ b/test/insn/lexicographical_compare.cc @@ -0,0 +1,63 @@ +/* Copyright (C) 2018 Povilas Kanapickas + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include +#include +#include + +namespace SIMDPP_ARCH_NAMESPACE { + + + template + void test_lexicograpical_compare_type(TestResultsSet& ts, TestReporter& tr) + { + using namespace simdpp; + using vector_aligned_t = std::vector::alignment>>; + { //test prologue + vector_aligned_t ivect = {(T)0,(T)1}; + vector_aligned_t ivect2={(T)1,(T)2}; + auto res = lexicographical_compare(ivect.data(),ivect.data()+ivect.size(),ivect2.data(),ivect2.data()+ivect2.size()); + auto resstd = std::lexicographical_compare(begin(ivect), end(ivect),begin(ivect2), end(ivect2)); + TEST_EQUAL(tr, res, resstd); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(50); + vector_aligned_t ivect2(50); + std::iota(begin(ivect),end(ivect),(T)0); + std::iota(begin(ivect2),end(ivect2),(T)1); + auto res = lexicographical_compare(ivect.data(),ivect.data()+ivect.size(),ivect2.data(),ivect2.data()+ivect2.size()); + auto resstd = std::lexicographical_compare(begin(ivect), end(ivect),begin(ivect2), end(ivect2)); + TEST_EQUAL(tr, res, resstd); + auto resinv = lexicographical_compare(ivect2.data(),ivect2.data()+ivect2.size(),ivect.data(),ivect.data()+ivect.size()); + auto resstdinv = std::lexicographical_compare(begin(ivect2), end(ivect2),begin(ivect), end(ivect)); + TEST_EQUAL(tr, resinv, resstdinv); + auto ressame = lexicographical_compare(ivect.data(),ivect.data()+ivect.size(),ivect.data(),ivect.data()+ivect.size()); + auto resstdsame = std::lexicographical_compare(begin(ivect), end(ivect),begin(ivect), end(ivect)); + TEST_EQUAL(tr, ressame, resstdsame); + } + } + + void test_lexicographical_compare(TestResults& res, TestReporter& tr) + { + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("lexicographical_compare"); + //test_lexicograpical_compare_type(ts, tr); //FIXME + //test_lexicograpical_compare_type(ts, tr); //FIXME + //test_lexicograpical_compare_type(ts, tr); //FIXME + //test_lexicograpical_compare_type(ts, tr); //FIXME + test_lexicograpical_compare_type(ts, tr); + test_lexicograpical_compare_type(ts, tr); + test_lexicograpical_compare_type(ts, tr); + test_lexicograpical_compare_type(ts, tr); + test_lexicograpical_compare_type(ts, tr); + test_lexicograpical_compare_type(ts, tr); + } + + } // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/tests.cc b/test/insn/tests.cc index 66cd90fb..a6ea0d1a 100644 --- a/test/insn/tests.cc +++ b/test/insn/tests.cc @@ -79,10 +79,12 @@ namespace SIMDPP_ARCH_NAMESPACE { test_copy_n(res,tr); test_count(res,tr); test_count_if(res,tr); + test_equal(res,tr); test_fill(res,tr); test_find(res, tr); test_find_if(res, tr); test_find_if_not(res, tr); + test_lexicographical_compare(res,tr); test_max(res,tr); test_max_element(res, tr); test_min(res,tr); diff --git a/test/insn/tests.h b/test/insn/tests.h index 739166ef..0d7544c9 100644 --- a/test/insn/tests.h +++ b/test/insn/tests.h @@ -28,11 +28,13 @@ void main_test_function(TestResults& res, TestReporter& tr, const TestOptions& o void test_copy_n(TestResults& res, TestReporter& tr); void test_count(TestResults& res, TestReporter& tr); void test_count_if(TestResults& res, TestReporter& tr); + void test_equal(TestResults& res, TestReporter& tr); void test_for_each(TestResults& res, TestReporter& tr); void test_fill(TestResults& res, TestReporter& tr); void test_find(TestResults& res, TestReporter& tr); void test_find_if(TestResults& res, TestReporter& tr); void test_find_if_not(TestResults& res, TestReporter& tr); + void test_lexicographical_compare(TestResults& res, TestReporter& tr); void test_math_fp(TestResults& res, const TestOptions& opts); void test_math_int(TestResults& res); void test_math_shift(TestResults& res); From b8b0b344b28f3a9e1a871ca017bac91bf89e4080 Mon Sep 17 00:00:00 2001 From: RETORNAZ Date: Sun, 11 Mar 2018 08:02:32 +0100 Subject: [PATCH 15/23] issue #107 add transform_reduce --- doc/wiki/Main_Page.mwiki | 1 + doc/wiki/algorithm/transform_reduce.mwiki | 42 ++++++ simdpp/algorithm/transform_reduce.h | 150 ++++++++++++++++++++++ simdpp/simd.h | 1 + test/CMakeLists.txt | 1 + test/insn/tests.cc | 1 + test/insn/tests.h | 1 + test/insn/transform_reduce.cc | 106 +++++++++++++++ 8 files changed, 303 insertions(+) create mode 100644 doc/wiki/algorithm/transform_reduce.mwiki create mode 100644 simdpp/algorithm/transform_reduce.h create mode 100644 test/insn/transform_reduce.cc diff --git a/doc/wiki/Main_Page.mwiki b/doc/wiki/Main_Page.mwiki index 094a325c..976cdbf2 100644 --- a/doc/wiki/Main_Page.mwiki +++ b/doc/wiki/Main_Page.mwiki @@ -196,6 +196,7 @@ the compiler will generate. {{ltt|algorithm/replace}}
{{ltt|algorithm/replace_if}}
{{ltt|algorithm/transform}}
+{{ltt|algorithm/transform_reduce}}
|- class="row rowbottom" diff --git a/doc/wiki/algorithm/transform_reduce.mwiki b/doc/wiki/algorithm/transform_reduce.mwiki new file mode 100644 index 00000000..f827f171 --- /dev/null +++ b/doc/wiki/algorithm/transform_reduce.mwiki @@ -0,0 +1,42 @@ +{{simdpp/title|transform_reduce}} +{{misc/navbar}} +{{dcl begin}} +{{dcl | num=1 | + template + T transform_reduce( const T* first, const T* last, T init, BinaryOp binary_op,UnaryOp unary_op); +}} +{{dcl | num=2 | + template + U transform_reduce(const T1* first1, const T1* last1, const T2* first2, U init, BinaryOp1 binary_op1, BinaryOp2 binary_op2); + +}} +{{dcl end}} +{{misc/navbar}} + +@1@ Applies {{tt|unary_op}} to each element in the range {{math|[first; last)}} and reduces the results (possibly permuted and aggregated in unspecified manner) along with the initial value {{tt|init}} over {{tt|binary_op}}. + +@2@ Applies {{tt|binary_op2}} to each pair of elements from the ranges {{c|[first; last)}} and the range starting at {{c|first2}} and reduces the results (possibly permuted and aggregated in unspecified manner) along with the initial value {{tt|init}} over {{tt|binary_op1}} + +===Notes=== +The behavior is non-deterministic if {{tt|binary_op}}/{{tt|binary_op2}} is not associative or not commutative. + +The behavior is undefined if {{tt|unary_op}}, {{tt|binary_op}}, {{tt|binary_op1}}, or {{tt|binary_op2}} modifies any element + +===Parameters=== +{{par begin}} +{{par | first1, last1 | the first range of elements to transform}} +{{par | first2 | the beginning of the second range of elements to transform}} +{{par | init | the initial value of the generalized sum}} +{{par | unary_op | unary {{concept|FunctionObject}} that will be applied to each element of the input range. The return type must be acceptable as input to {{tt|binary_op}} }} +{{par | binary_op | binary {{concept|FunctionObject}} that will be applied in unspecified order to the results of {{tt|unary_op}}, the results of other {{tt|binary_op}} and {{tt|init}}.}} +{{par end}} + +===Return value=== +@1@ Generalized sum of {{tt|init}} and {{tt|unary_op(*first)}}, {{tt|unary_op(*(first+1))}}, ... {{tt|unary_op(*(last-1))}} over {{tt|binary_op}}, +@2@ Generalized sum of {{tt|init}} and {{tt|binary_op2(*first,*first2)}}, {{tt|binary_op2(*(first+1),*(first2+1))}}, ..., over {{tt|binary_op1}} + +===See also=== +{{dsc begin}} +{{dsc inc | algorithm/dsc transform }} +{{dsc inc | algorithm/dsc reduce }} +{{dsc end}} diff --git a/simdpp/algorithm/transform_reduce.h b/simdpp/algorithm/transform_reduce.h new file mode 100644 index 00000000..96ac8ba9 --- /dev/null +++ b/simdpp/algorithm/transform_reduce.h @@ -0,0 +1,150 @@ +/* Copyright (C) 2018 Povilas Kanapickas + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef LIBSIMDPP_SIMDPP_ALGORITHM_TRANSFORM_REDUCE_H +#define LIBSIMDPP_SIMDPP_ALGORITHM_TRANSFORM_REDUCE_H + +#ifndef LIBSIMDPP_SIMD_H +#error "This file must be included through simd.h" +#endif + +#include +#include +#include +#include +#include +#include + +namespace simdpp { + namespace SIMDPP_ARCH_NAMESPACE { + template + T transform_reduce( const T* first, const T* last, T init, BinaryOp binary_op,UnaryOp unary_op) + { +#ifndef NDEBUG //precondition debug mode + if (!first) + throw std::runtime_error("transform_reduce - null ptr first."); + if (!last) + throw std::runtime_error("transform_reduce - null ptr last."); +#endif + using simd_type_T = typename typetraits::simd_type; + + //Define loop counter + const auto simd_size = simd_type_T::base_length; + const auto size = std::distance(first, last); + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + auto i = 0u; + //---prologue + for (; i < size_prologue_loop; ++i) + { + init = binary_op(init,unary_op(*first++)); + } + + simd_type_T accusimd=splat((T)0); + //---main simd loop + for (; i < size_simd_loop; i += simd_size) + { + simd_type_T element = unary_op(load(first)); + accusimd = binary_op(accusimd,element); + first += simd_size; + } + //reduce simd residual + for_each(accusimd, [&](T el) { init = binary_op(init, el); }); + + //---epilogue + for (; i < size; ++i) + { + init = binary_op(init,unary_op(*first++)); + } + return init; + + } + + template + U transform_reduce(const T1* first1, const T1* last1, const T2* first2, U init, BinaryOp1 binary_op1, BinaryOp2 binary_op2) + { +#ifndef NDEBUG //precondition debug mode + if (!first1) + throw std::runtime_error("transform_reduce - null ptr first1."); + if (!last1) + throw std::runtime_error("transform_reduce - null ptr last1."); + if (!first2) + throw std::runtime_error("transform_reduce - null ptr first2."); +#endif + using simd_type_T1 = typename typetraits::simd_type; + using simd_type_T2 = typename typetraits::simd_type; + using simd_type_U = typename typetraits::simd_type; + + static_assert (simd_type_T1::base_length == simd_type_T2::base_length + , "mismatch base_length between T1 and T2" + ); + static_assert (simd_type_T1::base_length == simd_type_U::base_length + , "mismatch base_length between T1 and U" + ); + static_assert (simd_type_T2::base_length == simd_type_U::base_length + , "mismatch base_length between T2 and U" + ); + + auto alignment = typetraits::alignment; + + //Define loop counter + const auto simd_size = simd_type_T1::base_length; + const auto size = std::distance(first1, last1); + const auto range = helper_input_range(first1, last1); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + + auto i = 0u; + //---Prologue + for(;i #include #include +#include /** @def SIMDPP_NO_DISPATCHER diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 49bfe641..a000282f 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -96,6 +96,7 @@ set(TEST_INSN_ARCH_SOURCES insn/test_utils.cc insn/tests.cc insn/transform.cc + insn/transform_reduce.cc insn/transpose.cc ) diff --git a/test/insn/tests.cc b/test/insn/tests.cc index a6ea0d1a..04879e8a 100644 --- a/test/insn/tests.cc +++ b/test/insn/tests.cc @@ -94,6 +94,7 @@ namespace SIMDPP_ARCH_NAMESPACE { test_replace_if(res,tr); test_reduce(res, tr); test_transform(res, tr); + test_transform_reduce(res, tr); } diff --git a/test/insn/tests.h b/test/insn/tests.h index 0d7544c9..69f8b04e 100644 --- a/test/insn/tests.h +++ b/test/insn/tests.h @@ -57,6 +57,7 @@ void main_test_function(TestResults& res, TestReporter& tr, const TestOptions& o void test_test_utils(TestResults& res); void test_transpose(TestResults& res); void test_transform(TestResults& res, TestReporter& tr); + void test_transform_reduce(TestResults& res, TestReporter& tr); } // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/transform_reduce.cc b/test/insn/transform_reduce.cc new file mode 100644 index 00000000..0477ccce --- /dev/null +++ b/test/insn/transform_reduce.cc @@ -0,0 +1,106 @@ +/* Copyright (C) 2018 Povilas Kanapickas + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "../utils/test_helpers.h" +#include "../utils/test_results.h" +#include +#include + +namespace SIMDPP_ARCH_NAMESPACE { + using namespace simdpp; + + template + struct UnaryPredicateSquare + { + using simd_type_T = typename typetraits::simd_type; + T operator()(T a) {return a*a;} + simd_type_T operator()(const simd_type_T& a) {return a*a;} + }; + + template + struct BinaryPredicatePlus + { + using simd_type_T = typename typetraits::simd_type; + T operator()(T a0,T a1) {return a0 + a1;} + simd_type_T operator()(const simd_type_T& a0,const simd_type_T& a1) {return a0+a1;} + }; + + template + struct BinaryPredicateMul + { + using simd_type_T = typename typetraits::simd_type; + T operator()(T a0,T a1) {return a0 * a1;} + simd_type_T operator()(const simd_type_T& a0,const simd_type_T& a1) {return a0*a1;} + }; + + template + void test_transform_reduce_type_unary(TestResultsSet& ts, TestReporter& tr) + { + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + {// + vector_aligned_t ivect={(T)0,T(1)}; + auto res=transform_reduce(ivect.data(),ivect.data()+ivect.size(),(T)0,BinaryPredicatePlus(),UnaryPredicateSquare()); + auto expected=std::inner_product( ivect.data(),ivect.data()+ivect.size(), ivect.data(), T(0)); + TEST_EQUAL(tr, expected,res); + } + {// + vector_aligned_t ivect(50); + std::iota(begin(ivect),end(ivect),(T)1); + auto res=transform_reduce(ivect.data(),ivect.data()+ivect.size(),(T)0,BinaryPredicatePlus(),UnaryPredicateSquare()); + auto expected=std::inner_product( ivect.data(),ivect.data()+ivect.size(), ivect.data(), T(0)); + TEST_EQUAL(tr, expected,res); + } + } + template + void test_transform_reduce_type_binary(TestResultsSet& ts, TestReporter& tr) + { + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + using vector_aligned_u = std::vector::alignment>>; + { + vector_aligned_t ivect(50); + std::iota(begin(ivect),end(ivect),(T)1); + vector_aligned_u ivect2(50); + std::iota(begin(ivect2),end(ivect2),(T)1); + auto res = transform_reduce( ivect.data(),ivect.data()+ivect.size(),ivect2.data(),T(0),BinaryPredicateMul(),BinaryPredicatePlus()); + auto expected = std::inner_product( ivect.data(), ivect.data()+ivect.size(), ivect2.data(), T(0)); + TEST_EQUAL(tr, expected,res); + } + } + + void test_transform_reduce(TestResults& res, TestReporter& tr) + { + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("transform_reduce"); + test_transform_reduce_type_unary(ts, tr); + test_transform_reduce_type_unary(ts, tr); + // test_transform_reduce_type_unary(ts, tr); //FIXME + // test_transform_reduce_type_unary(ts, tr); //FIXME + // test_transform_reduce_type_unary(ts, tr); //FIXME + // test_transform_reduce_type_unary(ts, tr); //FIXME + // test_transform_reduce_type_unary(ts, tr); //FIXME + // test_transform_reduce_type_unary(ts, tr); //FIXME + // test_transform_reduce_type_unary(ts, tr); //FIXME + // test_transform_reduce_type_unary(ts, tr); //FIXME + + test_transform_reduce_type_binary(ts, tr); + test_transform_reduce_type_binary(ts, tr); + // test_transform_reduce_type_binary(ts,tr); //FIXME + // test_transform_reduce_type_binary(ts, tr); //FIXME + // test_transform_reduce_type_binary(ts, tr); //FIXME + // test_transform_reduce_type_binary(ts, tr); //FIXME + // test_transform_reduce_type_binary(ts, tr); //FIXME + // test_transform_reduce_type_binary(ts, tr); //FIXME + // test_transform_reduce_type_binary(ts, tr); //FIXME + // test_transform_reduce_type_binary(ts, tr); //FIXME + + } + +} // namespace SIMDPP_ARCH_NAMESPACE From 179cc904c11133e9c3f358bf5cb59a3d61c2ecbf Mon Sep 17 00:00:00 2001 From: RETORNAZ Date: Sun, 11 Mar 2018 08:03:56 +0100 Subject: [PATCH 16/23] issue #107 ras --- doc/wiki/algorithm/equal.mwiki | 4 ++-- doc/wiki/algorithm/reduce.mwiki | 6 +++--- doc/wiki/algorithm/transform.mwiki | 4 ++-- simdpp/algorithm/transform.h | 6 +++--- test/insn/reduce.cc | 3 ++- 5 files changed, 12 insertions(+), 11 deletions(-) diff --git a/doc/wiki/algorithm/equal.mwiki b/doc/wiki/algorithm/equal.mwiki index b5ae22e7..3cee2008 100644 --- a/doc/wiki/algorithm/equal.mwiki +++ b/doc/wiki/algorithm/equal.mwiki @@ -15,8 +15,8 @@ {{tt|equal}} Returns {{c|true}} if the range {{tt|[first1,last1)}} is equal to the range {{tt|[first2, first2 + (last1 - first1))}} according to pred and {{c|false}} otherwise -1) Returns {{c|true}} if the range {{tt|[first1,last1)}} is equal to the range {{tt|[first2, first2 + (last1 - first1))}}, and {{c|false}} otherwise -2) The binary operation BinaryPredicate is applied to pairs of elements from two ranges: one defined by [first1, last1) and the other beginning at first2. +@1@ Returns {{c|true}} if the range {{tt|[first1,last1)}} is equal to the range {{tt|[first2, first2 + (last1 - first1))}}, and {{c|false}} otherwise +@2@ The binary operation BinaryPredicate is applied to pairs of elements from two ranges: one defined by [first1, last1) and the other beginning at first2. ===Parameters=== {{par begin}} diff --git a/doc/wiki/algorithm/reduce.mwiki b/doc/wiki/algorithm/reduce.mwiki index a916d1e4..2e35a049 100644 --- a/doc/wiki/algorithm/reduce.mwiki +++ b/doc/wiki/algorithm/reduce.mwiki @@ -12,8 +12,8 @@ template {{dcl end}} {{misc/navbar}} -1) Computes the sum over elements in the given Range [first,last) and the initial value init. -2) Reduces the range [first,last), possibly permuted and aggregated in unspecified manner, along with the initial value init over binary_op. +@1@ Computes the sum over elements in the given Range [first,last) and the initial value init. +@2@ Reduces the range [first,last), possibly permuted and aggregated in unspecified manner, along with the initial value init over binary_op. ===Notes=== The behavior is non-deterministic if binary_op is not associative or not commutative. @@ -35,4 +35,4 @@ in other words, {{tt|reduce}} behaves like {{lc|accumulate}} except the elements ===See also=== {{dsc begin}} {{dsc inc | algorithm/dsc transform }} -{{dsc end}} \ No newline at end of file +{{dsc end}} diff --git a/doc/wiki/algorithm/transform.mwiki b/doc/wiki/algorithm/transform.mwiki index ba716212..c6a82431 100644 --- a/doc/wiki/algorithm/transform.mwiki +++ b/doc/wiki/algorithm/transform.mwiki @@ -14,8 +14,8 @@ template {{tt|transform}} applies the given function to a range and stores the result in another range, beginning at out. -1) The unary operation unary_op is applied to the range defined by [first1, last1). -2) The binary operation binary_op is applied to pairs of elements from two ranges: one defined by [first1, last1) and the other beginning at first2. +@1@ The unary operation unary_op is applied to the range defined by [first1, last1). +@2@ The binary operation binary_op is applied to pairs of elements from two ranges: one defined by [first1, last1) and the other beginning at first2. ===Parameters=== {{par begin}} diff --git a/simdpp/algorithm/transform.h b/simdpp/algorithm/transform.h index b607f47a..0b51ef30 100644 --- a/simdpp/algorithm/transform.h +++ b/simdpp/algorithm/transform.h @@ -61,7 +61,7 @@ namespace simdpp { *out++ = f(*first++); } //---main simd loop - if (detail::is_aligned(out, alignment)) //TODO reach the first aligned adress + if (detail::is_aligned(out, alignment)) { for (; i < size_simd_loop; i += simd_size) { @@ -134,7 +134,7 @@ namespace simdpp { } //---main simd loop - if (detail::is_aligned(first1, alignment) && detail::is_aligned(first2, alignment) && detail::is_aligned(out, alignment))//TODO reach the first aligned adress + if (detail::is_aligned(first2, alignment) && detail::is_aligned(out, alignment)) { for (; i < size_simd_loop; i += simd_size) { @@ -150,7 +150,7 @@ namespace simdpp { { for (; i < size_simd_loop; i += simd_size) { - simd_type_T1 element1 = load_u(first1); + simd_type_T1 element1 = load(first1); simd_type_T2 element2 = load_u(first2); store_u(out, f(element1, element2)); first1 += simd_size; diff --git a/test/insn/reduce.cc b/test/insn/reduce.cc index 7c108cc7..8778bc17 100644 --- a/test/insn/reduce.cc +++ b/test/insn/reduce.cc @@ -119,7 +119,8 @@ namespace SIMDPP_ARCH_NAMESPACE { test_reducebinop_type(ts, tr); test_reducebinop_type(ts, tr); - //TR Why no operator * for above types at least for uint64_t and int64_t + //TR Why no operator * for above types at least for uint64_t + //and int64_t //FIXME //test_reducebinop_type(ts, tr); //test_reducebinop_type(ts, tr); //test_reducebinop_type(ts, tr); From f57deb03a6a76722a98b7c0d2f5258421526a806 Mon Sep 17 00:00:00 2001 From: thomas retornaz Date: Sun, 11 Mar 2018 11:01:38 +0100 Subject: [PATCH 17/23] issue #107 visual compilation fix --- test/insn/lexicographical_compare.cc | 1 + test/insn/replace.cc | 1 + test/insn/replace_if.cc | 1 + test/insn/transform_reduce.cc | 1 + 4 files changed, 4 insertions(+) diff --git a/test/insn/lexicographical_compare.cc b/test/insn/lexicographical_compare.cc index ecd50257..12d10759 100644 --- a/test/insn/lexicographical_compare.cc +++ b/test/insn/lexicographical_compare.cc @@ -10,6 +10,7 @@ #include #include #include +#include #include namespace SIMDPP_ARCH_NAMESPACE { diff --git a/test/insn/replace.cc b/test/insn/replace.cc index 880a5d98..7c3a2ca7 100644 --- a/test/insn/replace.cc +++ b/test/insn/replace.cc @@ -9,6 +9,7 @@ #include "../utils/test_results.h" #include #include +#include #include #include diff --git a/test/insn/replace_if.cc b/test/insn/replace_if.cc index 96578160..35c7c04f 100644 --- a/test/insn/replace_if.cc +++ b/test/insn/replace_if.cc @@ -9,6 +9,7 @@ #include "../utils/test_results.h" #include #include +#include #include #include diff --git a/test/insn/transform_reduce.cc b/test/insn/transform_reduce.cc index 0477ccce..0b52dc70 100644 --- a/test/insn/transform_reduce.cc +++ b/test/insn/transform_reduce.cc @@ -8,6 +8,7 @@ #include "../utils/test_helpers.h" #include "../utils/test_results.h" #include +#include #include namespace SIMDPP_ARCH_NAMESPACE { From 3d9fb98d2c5b3d9076601d1564e0da42e0cc4cc4 Mon Sep 17 00:00:00 2001 From: thomas retornaz Date: Mon, 9 Apr 2018 17:51:10 +0200 Subject: [PATCH 18/23] issue #107 Follow review * fix indent * add "fuzzing" tests for all algorithm * add TEST_EQUAL_COLLECTIONS * add nrt helpers for generating data (to be moved elsewhere ?) --- simdpp/algorithm/all_of.h | 85 +++--- simdpp/algorithm/any_of.h | 80 ++--- simdpp/algorithm/copy.h | 47 +-- simdpp/algorithm/copy_n.h | 15 +- simdpp/algorithm/count.h | 83 +++--- simdpp/algorithm/count_if.h | 83 +++--- simdpp/algorithm/equal.h | 140 ++++----- simdpp/algorithm/fill.h | 77 ++--- simdpp/algorithm/find.h | 111 +++---- simdpp/algorithm/find_if.h | 85 +++--- simdpp/algorithm/find_if_not.h | 87 +++--- simdpp/algorithm/helper_input_range.h | 59 ++-- simdpp/algorithm/lexicographical_compare.h | 177 +++++------ simdpp/algorithm/max.h | 223 +++++++------- simdpp/algorithm/max_element.h | 37 +-- simdpp/algorithm/min.h | 225 +++++++------- simdpp/algorithm/min_element.h | 37 +-- simdpp/algorithm/none_of.h | 27 +- simdpp/algorithm/reduce.h | 188 ++++++------ simdpp/algorithm/replace.h | 33 ++- simdpp/algorithm/replace_if.h | 43 +-- simdpp/algorithm/transform.h | 269 ++++++++--------- simdpp/algorithm/transform_reduce.h | 241 +++++++-------- simdpp/detail/align.h | 19 +- simdpp/dispatch/get_arch_string_list.h | 2 +- simdpp/simd.h | 25 -- simdpp/types/traits.h | 322 ++++++++++---------- test/insn/all_of.cc | 171 +++++++---- test/insn/any_of.cc | 150 ++++++---- test/insn/copy.cc | 138 +++++---- test/insn/copy_n.cc | 101 +++---- test/insn/count.cc | 106 +++++-- test/insn/count_if.cc | 123 +++++--- test/insn/equal.cc | 107 ++++--- test/insn/fill.cc | 117 +++++--- test/insn/find.cc | 109 ++++--- test/insn/find_if.cc | 139 +++++---- test/insn/find_if_not.cc | 141 +++++---- test/insn/lexicographical_compare.cc | 123 +++++--- test/insn/max.cc | 197 ++++++++----- test/insn/max_element.cc | 175 ++++++----- test/insn/min.cc | 200 ++++++++----- test/insn/min_element.cc | 179 +++++++----- test/insn/none_of.cc | 151 ++++++---- test/insn/reduce.cc | 268 ++++++++++------- test/insn/replace.cc | 114 +++++--- test/insn/replace_if.cc | 144 +++++---- test/insn/transform.cc | 323 ++++++++++++--------- test/insn/transform_reduce.cc | 241 +++++++++------ test/utils/test_helpers.h | 131 ++++++++- 50 files changed, 3735 insertions(+), 2733 deletions(-) diff --git a/simdpp/algorithm/all_of.h b/simdpp/algorithm/all_of.h index 5f53fe05..4bcb9d42 100644 --- a/simdpp/algorithm/all_of.h +++ b/simdpp/algorithm/all_of.h @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -17,53 +18,59 @@ #include namespace simdpp { - namespace SIMDPP_ARCH_NAMESPACE { +namespace SIMDPP_ARCH_NAMESPACE { - template - bool all_of(T const* first, T const* last, UnaryPredicate pred) - { -#ifndef NDEBUG //precondition debug mode - if (!first) - throw std::runtime_error("all_of - null ptr first."); - if (!last) - throw std::runtime_error("all_of - null ptr last."); +template +bool all_of(T const* first, T const* last, UnaryPredicate pred) +{ +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first) + throw std::runtime_error("all_of - null ptr first."); + if (!last) + throw std::runtime_error("all_of - null ptr last."); #endif - using simd_type_T = typename typetraits::simd_type; - using simd_mask_T = typename typetraits::simd_mask_type; + using simd_type_T = typename simd_traits::simd_type; + using simd_mask_T = typename simd_traits::simd_mask_type; - //define loopcounter - const auto simd_size = simd_type_T::base_length; + //define loopcounter + const auto simd_size = simd_type_T::base_length; - //note enforce that input is aligned when we start the main simd loop - const auto range = helper_input_range(first, last); - const auto size_prologue_loop = range.first; - const auto size_simd_loop = range.second; + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; - //prologue - auto lastprologue = first + size_prologue_loop; - if(!std::all_of(first, lastprologue, pred)) return false; + //prologue + auto lastprologue = first + size_prologue_loop; + if (!std::all_of(first, lastprologue, pred)) + { + return false; + } - //simd loop - auto i = size_prologue_loop; - //workaraund not reduce_add for mask type - const simd_type_T on = splat(T(1)); - const simd_type_T off = splat(T(0)); - for (; i < size_simd_loop; i += simd_size) - { - simd_mask_T mask = pred(load(lastprologue)); - const auto res = blend(on, off, mask); + //simd loop + auto i = size_prologue_loop; + //workaraund not reduce_and for mask type + const simd_type_T on = splat(T(1)); + const simd_type_T off = splat(T(0)); + for (; i < size_simd_loop; i += simd_size) + { + simd_mask_T mask = pred(load(lastprologue)); + const auto res = blend(on, off, mask); - if (!reduce_and(res)) - { - return false; - } - lastprologue += simd_size; - } - if(!std::all_of(lastprologue,last, pred)) return false; - return true; - } - } // namespace SIMDPP_ARCH_NAMESPACE + if (!reduce_and(res)) + { + return false; + } + lastprologue += simd_size; + } + if (!std::all_of(lastprologue,last, pred)) + { + return false; + } + return true; +} +} // namespace SIMDPP_ARCH_NAMESPACE } // namespace simdpp #endif //LIBSIMDPP_SIMDPP_ALGORITHM_ALL_OF_H diff --git a/simdpp/algorithm/any_of.h b/simdpp/algorithm/any_of.h index 30ee5194..4874beea 100644 --- a/simdpp/algorithm/any_of.h +++ b/simdpp/algorithm/any_of.h @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -17,52 +18,55 @@ #include namespace simdpp { - namespace SIMDPP_ARCH_NAMESPACE { +namespace SIMDPP_ARCH_NAMESPACE { - template - bool any_of(T const* first, T const* last, UnaryPredicate pred) - { -#ifndef NDEBUG //precondition debug mode - if (!first) - throw std::runtime_error("any_of - null ptr first."); - if (!last) - throw std::runtime_error("any_of - null ptr last."); +template +bool any_of(T const* first, T const* last, UnaryPredicate pred) +{ +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first) + throw std::runtime_error("any_of - null ptr first."); + if (!last) + throw std::runtime_error("any_of - null ptr last."); #endif - using simd_type_T = typename typetraits::simd_type; - using simd_mask_T = typename typetraits::simd_mask_type; + using simd_type_T = typename simd_traits::simd_type; + using simd_mask_T = typename simd_traits::simd_mask_type; - //define loopcounter - const auto simd_size = simd_type_T::base_length; + //define loopcounter + const auto simd_size = simd_type_T::base_length; - //note enforce that input is aligned when we start the main simd loop - const auto range = helper_input_range(first, last); - const auto size_prologue_loop = range.first; - const auto size_simd_loop = range.second; + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; - //prologue - auto lastprologue = first + size_prologue_loop; - if(std::any_of(first, lastprologue, pred)) return true; + //prologue + auto lastprologue = first + size_prologue_loop; + if (std::any_of(first, lastprologue, pred)) + { + return true; + } - //simd loop - auto i = size_prologue_loop; - //workaraund not test_bits_any for mask type - const simd_type_T on = splat(T(1)); //TODO factorize - const simd_type_T off = splat(T(0)); - for (; i < size_simd_loop; i += simd_size) - { - simd_mask_T mask = pred(load(lastprologue)); //TODO factorize - const auto res = blend(on, off, mask); - if (test_bits_any(res)) - { - return true; - } - lastprologue += simd_size; - } - return std::any_of(lastprologue,last, pred); - } + //simd loop + auto i = size_prologue_loop; + //workaraund not test_bits_any for mask type + const simd_type_T on = splat(T(1)); //TODO factorize + const simd_type_T off = splat(T(0)); + for (; i < size_simd_loop; i += simd_size) + { + simd_mask_T mask = pred(load(lastprologue)); //TODO factorize + const auto res = blend(on, off, mask); + if (test_bits_any(res)) + { + return true; + } + lastprologue += simd_size; + } + return std::any_of(lastprologue,last, pred); +} - } // namespace SIMDPP_ARCH_NAMESPACE +} // namespace SIMDPP_ARCH_NAMESPACE } // namespace simdpp #endif //LIBSIMDPP_SIMDPP_ALGORITHM_ANY_OF_H diff --git a/simdpp/algorithm/copy.h b/simdpp/algorithm/copy.h index dc505866..a826c748 100644 --- a/simdpp/algorithm/copy.h +++ b/simdpp/algorithm/copy.h @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -15,29 +16,29 @@ Distributed under the Boost Software License, Version 1.0. #include namespace simdpp { - namespace SIMDPP_ARCH_NAMESPACE { - - template - T* copy(T const* first, T const* last, T* out) - { - struct UnaryOpCopy - { - using simd_type_T = typename typetraits::simd_type; - SIMDPP_INL T operator()(T const &a) const noexcept - { - return a; - } - - SIMDPP_INL simd_type_T operator()(simd_type_T const &a) const noexcept - { - return a; - } - }; - - return transform(first, last, out, UnaryOpCopy{}); - } - - } // namespace SIMDPP_ARCH_NAMESPACE +namespace SIMDPP_ARCH_NAMESPACE { + +template +T* copy(T const* first, T const* last, T* out) +{ +struct UnaryOpCopy +{ + using simd_type_T = typename simd_traits::simd_type; + SIMDPP_INL T operator()(T const &a) const noexcept + { + return a; + } + + SIMDPP_INL simd_type_T operator()(simd_type_T const &a) const noexcept + { + return a; + } +}; + +return transform(first, last, out, UnaryOpCopy{}); +} + +} // namespace SIMDPP_ARCH_NAMESPACE } // namespace simdpp #endif //LIBSIMDPP_SIMDPP_ALGORITHM_COPY_H diff --git a/simdpp/algorithm/copy_n.h b/simdpp/algorithm/copy_n.h index 4999b489..c6165d10 100644 --- a/simdpp/algorithm/copy_n.h +++ b/simdpp/algorithm/copy_n.h @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -15,15 +16,15 @@ Distributed under the Boost Software License, Version 1.0. #include namespace simdpp { - namespace SIMDPP_ARCH_NAMESPACE { +namespace SIMDPP_ARCH_NAMESPACE { - template T* copy_n(T const* first, Size n, T* out) - { - if (n <= Size(0)) return out; - return copy(first, first + n, out); - } +template T* copy_n(T const* first, Size n, T* out) +{ + if (n <= Size(0)) return out; + return copy(first, first + n, out); +} - } // namespace SIMDPP_ARCH_NAMESPACE +} // namespace SIMDPP_ARCH_NAMESPACE } // namespace simdpp #endif //LIBSIMDPP_SIMDPP_ALGORITHM_COPY_N_H diff --git a/simdpp/algorithm/count.h b/simdpp/algorithm/count.h index ea78f002..489273c4 100644 --- a/simdpp/algorithm/count.h +++ b/simdpp/algorithm/count.h @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -15,56 +16,56 @@ Distributed under the Boost Software License, Version 1.0. #include namespace simdpp { - namespace SIMDPP_ARCH_NAMESPACE { +namespace SIMDPP_ARCH_NAMESPACE { - template - typename std::iterator_traits::difference_type - count(T const* first, T const* last, U val) - { -#ifndef NDEBUG //precondition debug mode - if (!first) - throw std::runtime_error("count - null ptr first."); - if (!last) - throw std::runtime_error("count - null ptr last."); +template +typename std::iterator_traits::difference_type +count(T const* first, T const* last, U val) +{ +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first) + throw std::runtime_error("count - null ptr first."); + if (!last) + throw std::runtime_error("count - null ptr last."); #endif - using simd_type_T = typename typetraits::simd_type; - using simd_mask_T = typename typetraits::simd_mask_type; - using return_type = typename std::iterator_traits::difference_type; - if (first == last) return (return_type)0; - //define loopcounter - const auto simd_size = simd_type_T::base_length; + using simd_type_T = typename simd_traits::simd_type; + using simd_mask_T = typename simd_traits::simd_mask_type; + using return_type = typename std::iterator_traits::difference_type; + if (first == last) return (return_type)0; + //define loopcounter + const auto simd_size = simd_type_T::base_length; - //note enforce that input is aligned when we start the main simd loop - const auto range = helper_input_range(first, last); - const auto size_prologue_loop = range.first; - const auto size_simd_loop = range.second; + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; - //prologue - auto lastprologue = first + size_prologue_loop; - return_type res = std::count(first, lastprologue, val); + //prologue + auto lastprologue = first + size_prologue_loop; + return_type res = std::count(first, lastprologue, val); - //simd loop - auto i = size_prologue_loop; + //simd loop + auto i = size_prologue_loop; - //workaraund not reduce_add for mask type - const simd_type_T on = splat(T(1)); - const simd_type_T off = splat(T(0)); - const simd_type_T valsimd = splat(U(val)); + //workaraund not reduce_add for mask type + const simd_type_T on = splat(T(1)); + const simd_type_T off = splat(T(0)); + const simd_type_T valsimd = splat(U(val)); - for (; i < size_simd_loop; i += simd_size) - { - const simd_type_T el = load(lastprologue); - const simd_mask_T mask = cmp_eq(el, valsimd); - const auto rescurrentsimd = blend(on, off, mask); - res += (return_type)reduce_add(rescurrentsimd); - lastprologue += simd_size; - } - res += std::count(lastprologue, last, val); - return res; - } + for (; i < size_simd_loop; i += simd_size) + { + const simd_type_T el = load(lastprologue); + const simd_mask_T mask = cmp_eq(el, valsimd); + const auto rescurrentsimd = blend(on, off, mask); + res += (return_type)reduce_add(rescurrentsimd); + lastprologue += simd_size; + } + res += std::count(lastprologue, last, val); + return res; +} - } // namespace SIMDPP_ARCH_NAMESPACE +} // namespace SIMDPP_ARCH_NAMESPACE } // namespace simdpp #endif //LIBSIMDPP_SIMDPP_ALGORITHM_COUNT_H diff --git a/simdpp/algorithm/count_if.h b/simdpp/algorithm/count_if.h index d19aff26..ded8d649 100644 --- a/simdpp/algorithm/count_if.h +++ b/simdpp/algorithm/count_if.h @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -12,55 +13,57 @@ Distributed under the Boost Software License, Version 1.0. #error "This file must be included through simd.h" #endif +#include + namespace simdpp { - namespace SIMDPP_ARCH_NAMESPACE { +namespace SIMDPP_ARCH_NAMESPACE { - template - typename std::iterator_traits::difference_type - count_if(T const* first, T const* last, UnaryPredicate pred) - { -#ifndef NDEBUG //precondition debug mode - if (!first) - throw std::runtime_error("count_if - null ptr first."); - if (!last) - throw std::runtime_error("count_if - null ptr last."); +template +typename std::iterator_traits::difference_type +count_if(T const* first, T const* last, UnaryPredicate pred) +{ +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first) + throw std::runtime_error("count_if - null ptr first."); + if (!last) + throw std::runtime_error("count_if - null ptr last."); #endif - using simd_type_T = typename typetraits::simd_type; - using simd_mask_T = typename typetraits::simd_mask_type; - using return_type = typename std::iterator_traits::difference_type; - if (first == last) return (return_type)0; - //define loopcounter - const auto simd_size = simd_type_T::base_length; + using simd_type_T = typename simd_traits::simd_type; + using simd_mask_T = typename simd_traits::simd_mask_type; + using return_type = typename std::iterator_traits::difference_type; + if (first == last) return (return_type)0; + //define loopcounter + const auto simd_size = simd_type_T::base_length; - //note enforce that input is aligned when we start the main simd loop - const auto range = helper_input_range(first, last); - const auto size_prologue_loop = range.first; - const auto size_simd_loop = range.second; + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; - //prologue - auto lastprologue = first + size_prologue_loop; - return_type res = std::count_if(first, lastprologue, pred); + //prologue + auto lastprologue = first + size_prologue_loop; + return_type res = std::count_if(first, lastprologue, pred); - //simd loop - auto i = size_prologue_loop; + //simd loop + auto i = size_prologue_loop; - //workaraund not reduce_add for mask type - const simd_type_T on = splat(T(1)); - const simd_type_T off = splat(T(0)); - for (; i < size_simd_loop; i += simd_size) - { - const simd_type_T el = load(lastprologue); - const simd_mask_T mask = pred(el); - const auto rescurrentsimd = blend(on, off, mask); - res += (return_type)reduce_add(rescurrentsimd); - lastprologue += simd_size; - } - res += std::count_if(lastprologue, last, pred); - return res; - } + //workaraund not reduce_add for mask type + const simd_type_T on = splat(T(1)); + const simd_type_T off = splat(T(0)); + for (; i < size_simd_loop; i += simd_size) + { + const simd_type_T el = load(lastprologue); + const simd_mask_T mask = pred(el); + const auto rescurrentsimd = blend(on, off, mask); + res += (return_type)reduce_add(rescurrentsimd); + lastprologue += simd_size; + } + res += std::count_if(lastprologue, last, pred); + return res; +} - } // namespace SIMDPP_ARCH_NAMESPACE +} // namespace SIMDPP_ARCH_NAMESPACE } // namespace simdpp #endif //LIBSIMDPP_SIMDPP_ALGORITHM_COUNT_IF_H diff --git a/simdpp/algorithm/equal.h b/simdpp/algorithm/equal.h index 52e1e5b1..ea1209ff 100644 --- a/simdpp/algorithm/equal.h +++ b/simdpp/algorithm/equal.h @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -13,87 +14,88 @@ #endif #include +#include namespace simdpp { - namespace SIMDPP_ARCH_NAMESPACE { +namespace SIMDPP_ARCH_NAMESPACE { - template - bool equal(const T* first1, const T* last1, const T* first2,BinaryPredicate pred) - { +template +bool equal(const T* first1, const T* last1, const T* first2,BinaryPredicate pred) +{ -#ifndef NDEBUG //precondition debug mode - if (!first1) - throw std::runtime_error("equal - null ptr first1."); - if (!last1) - throw std::runtime_error("equal - null ptr last1."); - if (!first2) - throw std::runtime_error("equal - null ptr first2."); +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first1) + throw std::runtime_error("equal - null ptr first1."); + if (!last1) + throw std::runtime_error("equal - null ptr last1."); + if (!first2) + throw std::runtime_error("equal - null ptr first2."); #endif - using simd_type_T = typename typetraits::simd_type; - //using simd_mask_T = typename typetraits::simd_mask_type; - auto alignment = typetraits::alignment; + using simd_type_T = typename simd_traits::simd_type; + //using simd_mask_T = typename simd_traits::simd_mask_type; + auto alignment = simd_traits::alignment; - //define loopcounter - const auto simd_size = simd_type_T::base_length; + //define loopcounter + const auto simd_size = simd_type_T::base_length; - //note enforce that input is aligned when we start the main simd loop - const auto range = helper_input_range(first1, last1); - const auto size_prologue_loop = range.first; - const auto size_simd_loop = range.second; + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first1, last1); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; - //prologue - auto last1prologue = first1 + size_prologue_loop; - auto last2prologue = first2 + size_prologue_loop; - if(!std::equal(first1, last1prologue,first2,pred)) return false; + //prologue + auto last1prologue = first1 + size_prologue_loop; + auto last2prologue = first2 + size_prologue_loop; + if(!std::equal(first1, last1prologue,first2,pred)) return false; - auto i=size_prologue_loop; - //workaound no reduce_and for mask_type - const simd_type_T on=splat(T(1)); - const simd_type_T off=splat(T(0)); - //---main simd loop - if (detail::is_aligned(last2prologue, alignment)) - { - for (; i < size_simd_loop; i += simd_size) - { - const simd_type_T element1 = load(last1prologue); - const simd_type_T element2 = load(last2prologue); - const simd_type_T res=blend(on,off,pred(element1,element2)); //workaound no reduce_and for mask_type - if(!reduce_and(res)) return false; - last1prologue += simd_size; - last2prologue += simd_size; - } - } - else - { - for (; i < size_simd_loop; i += simd_size) - { - const simd_type_T element1 = load(last1prologue); - const simd_type_T element2 = load_u(last2prologue); - const simd_type_T res=blend(on,off,pred(element1,element2));//workaound no reduce_and for mask_type - if(!reduce_and(res)) return false; - last1prologue += simd_size; - last2prologue += simd_size; - } - } + auto i=size_prologue_loop; + //workaound no reduce_and for mask_type + const simd_type_T on=splat(T(1)); + const simd_type_T off=splat(T(0)); + //---main simd loop + if (detail::is_aligned(last2prologue, alignment)) + { + for (; i < size_simd_loop; i += simd_size) + { + const simd_type_T element1 = load(last1prologue); + const simd_type_T element2 = load(last2prologue); + const simd_type_T res=blend(on,off,pred(element1,element2)); //workaound no reduce_and for mask_type + if(!reduce_and(res)) return false; + last1prologue += simd_size; + last2prologue += simd_size; + } + } + else + { + for (; i < size_simd_loop; i += simd_size) + { + const simd_type_T element1 = load(last1prologue); + const simd_type_T element2 = load_u(last2prologue); + const simd_type_T res=blend(on,off,pred(element1,element2));//workaound no reduce_and for mask_type + if(!reduce_and(res)) return false; + last1prologue += simd_size; + last2prologue += simd_size; + } + } - if(!std::equal(last1prologue, last1,last2prologue,pred)) return false; - return true; - } + if(!std::equal(last1prologue, last1,last2prologue,pred)) return false; + return true; +} - template - bool equal(const T* first1, const T* last1, const T* first2) - { - struct local_bynary_predicate_equal - { - using simd_type_T = typename typetraits::simd_type; - using simd_mask_T = typename typetraits::simd_mask_type; - bool operator()(const T& a0,const T& a1) {return a0==a1;} - simd_mask_T operator()(const simd_type_T& a0,const simd_type_T& a1) {return cmp_eq(a0,a1);} - }; - return equal(first1,last1,first2,local_bynary_predicate_equal()); - } +template +bool equal(const T* first1, const T* last1, const T* first2) +{ + struct local_bynary_predicate_equal + { + using simd_type_T = typename simd_traits::simd_type; + using simd_mask_T = typename simd_traits::simd_mask_type; + bool operator()(const T& a0,const T& a1) {return a0==a1;} + simd_mask_T operator()(const simd_type_T& a0,const simd_type_T& a1) {return cmp_eq(a0,a1);} + }; + return equal(first1,last1,first2,local_bynary_predicate_equal()); +} - } // namespace SIMDPP_ARCH_NAMESPACE +} // namespace SIMDPP_ARCH_NAMESPACE } // namespace simdpp #endif //LIBSIMDPP_SIMDPP_ALGORITHM_EQUAL_H diff --git a/simdpp/algorithm/fill.h b/simdpp/algorithm/fill.h index 96b8ba04..b6b507b4 100644 --- a/simdpp/algorithm/fill.h +++ b/simdpp/algorithm/fill.h @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -22,55 +23,55 @@ Distributed under the Boost Software License, Version 1.0. #include namespace simdpp { - namespace SIMDPP_ARCH_NAMESPACE { +namespace SIMDPP_ARCH_NAMESPACE { - template - void fill(T* first, T* last, U value) - { -#ifndef NDEBUG //precondition debug mode - if (!first) - throw std::runtime_error("fill - null ptr first."); - if (!last) - throw std::runtime_error("fill - null ptr last."); +template +void fill(T* first, T* last, U value) +{ +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first) + throw std::runtime_error("fill - null ptr first."); + if (!last) + throw std::runtime_error("fill - null ptr last."); #endif - using simd_type_T = typename typetraits::simd_type; - const auto alignment = typetraits::alignment; + using simd_type_T = typename simd_traits::simd_type; + const auto alignment = simd_traits::alignment; - simd_type_T valsimd = splat((T)value); + simd_type_T valsimd = splat((T)value); - //Define loop counter - const auto simd_size = simd_type_T::base_length; - const auto size = std::distance(first, last); - //note enforce that input is aligned when we start the main simd loop - const auto range = helper_input_range(first, last); - const auto size_prologue_loop = range.first; - const auto size_simd_loop = range.second; + //Define loop counter + const auto simd_size = simd_type_T::base_length; + const auto size = std::distance(first, last); + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; - auto i = 0u; + auto i = 0u; - //---prologue - for (; i < size_prologue_loop; ++i) - { - *first++=(T)value; - } + //---prologue + for (; i < size_prologue_loop; ++i) + { + *first++=(T)value; + } - //---main simd loop - for (; i < size_simd_loop; i += simd_size) - { - store(first, valsimd); - first += simd_size; - } + //---main simd loop + for (; i < size_simd_loop; i += simd_size) + { + store(first, valsimd); + first += simd_size; + } - //---epilogue - for (; i < size; ++i) - { - *first++ = (T)value; - } + //---epilogue + for (; i < size; ++i) + { + *first++ = (T)value; + } - } +} - } // namespace SIMDPP_ARCH_NAMESPACE +} // namespace SIMDPP_ARCH_NAMESPACE } // namespace simdpp #endif //LIBSIMDPP_SIMDPP_ALGORITHM_FILL_H diff --git a/simdpp/algorithm/find.h b/simdpp/algorithm/find.h index c9fa597e..663e8ef2 100644 --- a/simdpp/algorithm/find.h +++ b/simdpp/algorithm/find.h @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -18,71 +19,71 @@ Distributed under the Boost Software License, Version 1.0. #include namespace simdpp { - namespace SIMDPP_ARCH_NAMESPACE { +namespace SIMDPP_ARCH_NAMESPACE { - template - T const* find(T const* first, T const* last, U val) - { - struct UnaryPredicateEqualValue - { - public: - UnaryPredicateEqualValue(T val) :m_val(val), m_val_simd(splat(val)) {} - using simd_mask_T = typename typetraits::simd_mask_type; - using simd_type_T = typename typetraits::simd_type; +template +T const* find(T const* first, T const* last, U val) +{ + struct UnaryPredicateEqualValue + { + public: + UnaryPredicateEqualValue(T val) :m_val(val), m_val_simd(splat(val)) {} + using simd_mask_T = typename simd_traits::simd_mask_type; + using simd_type_T = typename simd_traits::simd_type; - bool operator()(T a) const { return a == m_val; } - simd_mask_T operator()(const simd_type_T& a) const { return cmp_eq(a, m_val_simd); } - private: - T m_val; - simd_type_T m_val_simd; - }; + bool operator()(T a) const { return a == m_val; } + simd_mask_T operator()(const simd_type_T& a) const { return cmp_eq(a, m_val_simd); } + private: + T m_val; + simd_type_T m_val_simd; + }; -#ifndef NDEBUG //precondition debug mode - if (!first) - throw std::runtime_error("find - null ptr first."); - if (!last) - throw std::runtime_error("find - null ptr last."); +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first) + throw std::runtime_error("find - null ptr first."); + if (!last) + throw std::runtime_error("find - null ptr last."); #endif - using simd_type_T = typename typetraits::simd_type; - using simd_mask_T =typename typetraits::simd_mask_type; - - if (first == last) return last; + using simd_type_T = typename simd_traits::simd_type; + using simd_mask_T =typename simd_traits::simd_mask_type; - //define loopcounter - const auto simd_size = simd_type_T::base_length; - //note enforce that input is aligned when we start the main simd loop - const auto range = helper_input_range(first, last); - const auto size_prologue_loop = range.first; - const auto size_simd_loop = range.second; + if (first == last) return last; - //prologue - auto lastprologue = first + size_prologue_loop; - const auto resprologue = std::find(first, lastprologue, val); - if (resprologue != lastprologue) return resprologue; + //define loopcounter + const auto simd_size = simd_type_T::base_length; + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; - //simd loop - auto i = size_prologue_loop; + //prologue + auto lastprologue = first + size_prologue_loop; + const auto resprologue = std::find(first, lastprologue, val); + if (resprologue != lastprologue) return resprologue; - //workaraund not test_bits_any for mask type - const simd_type_T on = splat(T(1)); - const simd_type_T off = splat(T(0)); - const auto pred = UnaryPredicateEqualValue(val); - for (; i < size_simd_loop; i += simd_size) - { - //TR why can't test_bits_any not available for mask? - const simd_mask_T mask=pred(load(lastprologue)); - const auto res = blend(on, off,mask); - if (test_bits_any(res)) //match extract exact position - { - return std::find_if(lastprologue, lastprologue + simd_size, pred); //or extract position from res ? - } - lastprologue += simd_size; - } + //simd loop + auto i = size_prologue_loop; - //epilogue - return std::find(lastprologue, last, val); + //workaraund not test_bits_any for mask type + const simd_type_T on = splat(T(1)); + const simd_type_T off = splat(T(0)); + const auto pred = UnaryPredicateEqualValue((T)val); + for (; i < size_simd_loop; i += simd_size) + { + //TR why can't test_bits_any not available for mask? + const simd_mask_T mask=pred(load(lastprologue)); + const auto res = blend(on, off,mask); + if (test_bits_any(res)) //match extract exact position + { + return std::find_if(lastprologue, lastprologue + simd_size, pred); //or extract position from res ? } + lastprologue += simd_size; + } + + //epilogue + return std::find(lastprologue, last, val); +} - } // namespace SIMDPP_ARCH_NAMESPACE +} // namespace SIMDPP_ARCH_NAMESPACE } // namespace simdpp #endif //LIBSIMDPP_SIMDPP_ALGORITHM_FIND_H diff --git a/simdpp/algorithm/find_if.h b/simdpp/algorithm/find_if.h index e1bd5874..f8e998f5 100644 --- a/simdpp/algorithm/find_if.h +++ b/simdpp/algorithm/find_if.h @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -18,56 +19,56 @@ Distributed under the Boost Software License, Version 1.0. #include namespace simdpp { - namespace SIMDPP_ARCH_NAMESPACE { +namespace SIMDPP_ARCH_NAMESPACE { - template - T const* find_if(T const* first, T const* last, UnaryPredicate pred) - { -#ifndef NDEBUG //precondition debug mode - if (!first) - throw std::runtime_error("find_if - null ptr first."); - if (!last) - throw std::runtime_error("find_if - null ptr last."); +template +T const* find_if(T const* first, T const* last, UnaryPredicate pred) +{ +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first) + throw std::runtime_error("find_if - null ptr first."); + if (!last) + throw std::runtime_error("find_if - null ptr last."); #endif - using simd_type_T = typename typetraits::simd_type; - using simd_mask_T = typename typetraits::simd_mask_type; + using simd_type_T = typename simd_traits::simd_type; + using simd_mask_T = typename simd_traits::simd_mask_type; - if (first == last) return last; - - //define loopcounter - const auto simd_size = simd_type_T::base_length; - //note enforce that input is aligned when we start the main simd loop - const auto range = helper_input_range(first, last); - const auto size_prologue_loop = range.first; - const auto size_simd_loop = range.second; + if (first == last) return last; - //prologue - auto lastprologue = first + size_prologue_loop; - const auto resprologue = std::find_if(first, lastprologue, pred); - if (resprologue != lastprologue) return resprologue; + //define loopcounter + const auto simd_size = simd_type_T::base_length; + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; - //simd loop - auto i = size_prologue_loop; + //prologue + auto lastprologue = first + size_prologue_loop; + const auto resprologue = std::find_if(first, lastprologue, pred); + if (resprologue != lastprologue) return resprologue; - //workaraund not test_bits_any for mask type - const simd_type_T on = splat(T(1)); - const simd_type_T off = splat(T(0)); - for (; i < size_simd_loop; i += simd_size) - { - //TR why can't test_bits_any not available for mask? - const simd_mask_T mask = pred(load(lastprologue)); - const auto res = blend(on, off,mask); - if (test_bits_any(res)) //match extract exact position - { - return std::find_if(lastprologue, lastprologue + simd_size, pred); //or extract position from res ? - } - lastprologue += simd_size; - } + //simd loop + auto i = size_prologue_loop; - //epilogue - return std::find_if(lastprologue, last, pred); + //workaraund not test_bits_any for mask type + const simd_type_T on = splat(T(1)); + const simd_type_T off = splat(T(0)); + for (; i < size_simd_loop; i += simd_size) + { + //TR why can't test_bits_any not available for mask? + const simd_mask_T mask = pred(load(lastprologue)); + const auto res = blend(on, off,mask); + if (test_bits_any(res)) //match extract exact position + { + return std::find_if(lastprologue, lastprologue + simd_size, pred); //or extract position from res ? } + lastprologue += simd_size; + } + + //epilogue + return std::find_if(lastprologue, last, pred); +} - } // namespace SIMDPP_ARCH_NAMESPACE +} // namespace SIMDPP_ARCH_NAMESPACE } // namespace simdpp #endif //LIBSIMDPP_SIMDPP_ALGORITHM_FIND_IF_H diff --git a/simdpp/algorithm/find_if_not.h b/simdpp/algorithm/find_if_not.h index 4b854c7b..b237e51a 100644 --- a/simdpp/algorithm/find_if_not.h +++ b/simdpp/algorithm/find_if_not.h @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -18,57 +19,57 @@ Distributed under the Boost Software License, Version 1.0. #include namespace simdpp { - namespace SIMDPP_ARCH_NAMESPACE { +namespace SIMDPP_ARCH_NAMESPACE { - template - T const* find_if_not(T const* first, T const* last, UnaryPredicate pred) - { -#ifndef NDEBUG //precondition debug mode - if (!first) - throw std::runtime_error("find_if - null ptr first."); - if (!last) - throw std::runtime_error("find_if - null ptr last."); +template +T const* find_if_not(T const* first, T const* last, UnaryPredicate pred) +{ +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first) + throw std::runtime_error("find_if - null ptr first."); + if (!last) + throw std::runtime_error("find_if - null ptr last."); #endif - using simd_type_T = typename typetraits::simd_type; - using simd_mask_T = typename typetraits::simd_mask_type; - - if (first == last) return last; + using simd_type_T = typename simd_traits::simd_type; + using simd_mask_T = typename simd_traits::simd_mask_type; - //define loopcounter - const auto simd_size = simd_type_T::base_length; - const auto size = std::distance(first, last); - //note enforce that input is aligned when we start the main simd loop - const auto range = helper_input_range(first, last); - const auto size_prologue_loop = range.first; - const auto size_simd_loop = range.second; + if (first == last) return last; - //prologue - auto lastprologue = first + size_prologue_loop; - const auto resprologue = std::find_if_not(first, lastprologue, pred); - if (resprologue != lastprologue) return resprologue; + //define loopcounter + const auto simd_size = simd_type_T::base_length; + const auto size = std::distance(first, last); + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; - //simd loop - auto i = size_prologue_loop; + //prologue + auto lastprologue = first + size_prologue_loop; + const auto resprologue = std::find_if_not(first, lastprologue, pred); + if (resprologue != lastprologue) return resprologue; - //workaraund not reduce_and for mask type - const simd_type_T on = splat(T(1)); - const simd_type_T off = splat(T(0)); - for (; i < size_simd_loop; i += simd_size) - { - //TR why can't reduce_and not available for mask? - simd_mask_T mask = pred(load(lastprologue)); - const auto res = blend(on, off, mask); - if (!reduce_and(res)) //match extract exact position - { - return std::find_if_not(lastprologue, lastprologue + simd_size, pred); //or extract position from res ? - } - lastprologue += simd_size; - } + //simd loop + auto i = size_prologue_loop; - //epilogue - return std::find_if_not(lastprologue, last, pred); + //workaraund not reduce_and for mask type + const simd_type_T on = splat(T(1)); + const simd_type_T off = splat(T(0)); + for (; i < size_simd_loop; i += simd_size) + { + //TR why can't reduce_and not available for mask? + simd_mask_T mask = pred(load(lastprologue)); + const auto res = blend(on, off, mask); + if (!reduce_and(res)) //match extract exact position + { + return std::find_if_not(lastprologue, lastprologue + simd_size, pred); //or extract position from res ? } + lastprologue += simd_size; + } + + //epilogue + return std::find_if_not(lastprologue, last, pred); +} - } // namespace SIMDPP_ARCH_NAMESPACE +} // namespace SIMDPP_ARCH_NAMESPACE } // namespace simdpp #endif //LIBSIMDPP_SIMDPP_ALGORITHM_FIND_IF_NOT_H diff --git a/simdpp/algorithm/helper_input_range.h b/simdpp/algorithm/helper_input_range.h index e12217a5..c4fd7670 100644 --- a/simdpp/algorithm/helper_input_range.h +++ b/simdpp/algorithm/helper_input_range.h @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -17,40 +18,40 @@ #include namespace simdpp { - namespace SIMDPP_ARCH_NAMESPACE { - /** - Extract from contigous range [first,last[ - The two loop counter - -the scalar prologue [start,size_prologue_loop[ i.e the range defined between the original begin and the first - location to be properly aligned to be used through simd operators - - the main simd_loop_part,[size_prologue_loop,size_simd_loop[ i.e the range where we could apply simd operators - - Note epilogue equals [size_simd_loop,stop[ - */ - template - const std::pair helper_input_range(const T* first, const T* last) - { -#ifndef NDEBUG //precondition debug mode - if (!first) - throw std::runtime_error("helper_input_range - null ptr first."); - if (!last) - throw std::runtime_error("helper_input_range - null ptr last."); +namespace SIMDPP_ARCH_NAMESPACE { +/** +Extract from contigous range [first,last[ + The two loop counter + -the scalar prologue [start,size_prologue_loop[ i.e the range defined between the original begin and the first + location to be properly aligned to be used through simd operators + - the main simd_loop_part,[size_prologue_loop,size_simd_loop[ i.e the range where we could apply simd operators + - Note epilogue equals [size_simd_loop,stop[ +*/ +template +const std::pair helper_input_range(const T* first, const T* last) +{ +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first) + throw std::runtime_error("helper_input_range - null ptr first."); + if (!last) + throw std::runtime_error("helper_input_range - null ptr last."); #endif - using simd_type_T = typename typetraits::simd_type; + using simd_type_T = typename simd_traits::simd_type; - const auto simd_size = simd_type_T::base_length; - const auto alignment = typetraits::alignment; + const auto simd_size = simd_type_T::base_length; + const auto alignment = simd_traits::alignment; - const auto size = std::distance(first, last); - //get first aligned adress from first - const T* ptr_aligned_first =(T*)detail::reach_next_aligned((void*)first, alignment); - // Next aligned address may be out of range, so make sure size_prologue_loop is not bigger than size - const auto size_prologue_loop = std::min(size,std::distance(first, ptr_aligned_first)); - const auto size_simd_loop = (size >= size_prologue_loop) ? (simd_size * ((size- size_prologue_loop) / simd_size)) : (0u); + const auto size = last - first; + //get first aligned adress from first + const T* ptr_aligned_first =detail::reach_next_aligned(first, alignment); + // Next aligned address may be out of range, so make sure size_prologue_loop is not bigger than size + const auto size_prologue_loop = std::min(size,std::distance(first, ptr_aligned_first)); + const auto size_simd_loop = (size >= size_prologue_loop) ? (simd_size * ((size- size_prologue_loop) / simd_size)) : (0u); - return std::make_pair(size_prologue_loop, size_simd_loop); - } + return std::make_pair(size_prologue_loop, size_simd_loop); +} - } // namespace SIMDPP_ARCH_NAMESPACE +} // namespace SIMDPP_ARCH_NAMESPACE } // namespace simdpp #endif //LIBSIMDPP_SIMDPP_HELPER_INPUT_RANGE_H diff --git a/simdpp/algorithm/lexicographical_compare.h b/simdpp/algorithm/lexicographical_compare.h index 4ac7aa02..35dd05d1 100644 --- a/simdpp/algorithm/lexicographical_compare.h +++ b/simdpp/algorithm/lexicographical_compare.h @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -22,109 +23,109 @@ #include namespace simdpp { - namespace SIMDPP_ARCH_NAMESPACE { +namespace SIMDPP_ARCH_NAMESPACE { - template - bool lexicographical_compare(const T* first1, const T* last1, const T* first2, const T* last2,BinarayPredicate comp) - { -#ifndef NDEBUG //precondition debug mode - if (!first1) - throw std::runtime_error("lexicographical_compare - null ptr first1."); - if (!last1) - throw std::runtime_error("lexicographical_compare - null ptr last1."); - if (!first2) - throw std::runtime_error("lexicographical_compare - null ptr first2."); - if (!last2) - throw std::runtime_error("lexicographical_compare - null ptr last2."); +template +bool lexicographical_compare(const T* first1, const T* last1, const T* first2, const T* last2,BinarayPredicate comp) +{ +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first1) + throw std::runtime_error("lexicographical_compare - null ptr first1."); + if (!last1) + throw std::runtime_error("lexicographical_compare - null ptr last1."); + if (!first2) + throw std::runtime_error("lexicographical_compare - null ptr first2."); + if (!last2) + throw std::runtime_error("lexicographical_compare - null ptr last2."); #endif - using simd_type_T = typename typetraits::simd_type; + using simd_type_T = typename simd_traits::simd_type; - auto alignment = typetraits::alignment; - using difference_type_T = typename std::iterator_traits::difference_type; - difference_type_T d1 = std::distance(first1, last1); - difference_type_T d2 = std::distance(first2, last2); - bool shorter = d1 < d2; - auto last = shorter ? last1 : first1+d2; - auto size = shorter ? d1 : d2; + auto alignment = simd_traits::alignment; + using difference_type_T = typename std::iterator_traits::difference_type; + difference_type_T d1 = std::distance(first1, last1); + difference_type_T d2 = std::distance(first2, last2); + bool shorter = d1 < d2; + auto last = shorter ? last1 : first1+d2; + auto size = shorter ? d1 : d2; - //define loopcounter - const auto simd_size = simd_type_T::base_length; + //define loopcounter + const auto simd_size = simd_type_T::base_length; - //note enforce that input is aligned when we start the main simd loop - const auto range = helper_input_range(first1, last); - const auto size_prologue_loop = range.first; - const auto size_simd_loop = range.second; + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first1, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; - auto i=0u; - //---prologue - for (; i < size_prologue_loop; ++i) - { - if (comp(*first1, *first2)) return true; - if (comp(*first2, *first1)) return false; - ++first1; - ++first2; - } + auto i=0u; + //---prologue + for (; i < size_prologue_loop; ++i) + { + if (comp(*first1, *first2)) return true; + if (comp(*first2, *first1)) return false; + ++first1; + ++first2; + } - //---main simd loop - if (detail::is_aligned(first2, alignment)) - { - for (; i < size_simd_loop; i += simd_size) - { - simd_type_T element1 = load(first1); - simd_type_T element2 = load(first2); - if(comp(element1, element2)) return true; - if(comp(element2, element1)) return false; - first1 += simd_size; - first2 += simd_size; - } - } - else - { - for (; i < size_simd_loop; i += simd_size) - { - simd_type_T element1 = load(first1); - simd_type_T element2 = load_u(first2); - if(comp(element1, element2)) return true; - if(comp(element2, element1)) return false; - first1 += simd_size; - first2 += simd_size; - } - } - //---epilogue - for (; i < size; ++i) - { - if (comp(*first1, *first2)) return true; - if (comp(*first2, *first1)) return false; - ++first1; - ++first2; - } - return shorter; - } + //---main simd loop + if (detail::is_aligned(first2, alignment)) + { + for (; i < size_simd_loop; i += simd_size) + { + simd_type_T element1 = load(first1); + simd_type_T element2 = load(first2); + if(comp(element1, element2)) return true; + if(comp(element2, element1)) return false; + first1 += simd_size; + first2 += simd_size; + } + } + else + { + for (; i < size_simd_loop; i += simd_size) + { + simd_type_T element1 = load(first1); + simd_type_T element2 = load_u(first2); + if(comp(element1, element2)) return true; + if(comp(element2, element1)) return false; + first1 += simd_size; + first2 += simd_size; + } + } + //---epilogue + for (; i < size; ++i) + { + if (comp(*first1, *first2)) return true; + if (comp(*first2, *first1)) return false; + ++first1; + ++first2; + } + return shorter; +} - template - bool lexicographical_compare(const T* first1, const T* last1, const T* first2, const T* last2) - { - struct local_binary_predicate_less - { - using simd_type_T = typename typetraits::simd_type; +template +bool lexicographical_compare(const T* first1, const T* last1, const T* first2, const T* last2) +{ + struct local_binary_predicate_less + { + using simd_type_T = typename simd_traits::simd_type; - local_binary_predicate_less():on(splat(T(1))),off(splat(T(0))) {} + local_binary_predicate_less():on(splat(T(1))),off(splat(T(0))) {} - bool operator()(T a0,T a1) { return a0 + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -20,120 +21,120 @@ Distributed under the Boost Software License, Version 1.0. #include namespace simdpp { - namespace SIMDPP_ARCH_NAMESPACE { - - //Returns the value of the element with the largest value in the range[first, last[ over comp, - //The lowest possible value for the order if the range is empty. - template - T max(T const* first, T const* last, Comp comp) - { -#ifndef NDEBUG //precondition debug mode - if (!first) - throw std::runtime_error("max - null ptr first."); - if (!last) - throw std::runtime_error("max - null ptr last."); +namespace SIMDPP_ARCH_NAMESPACE { + +//Returns the value of the element with the largest value in the range[first, last[ over comp, +//The lowest possible value for the order if the range is empty. +template +T max(T const* first, T const* last, Comp comp) +{ +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first) + throw std::runtime_error("max - null ptr first."); + if (!last) + throw std::runtime_error("max - null ptr last."); #endif - using simd_type_T = typename typetraits::simd_type; - using simd_mask_T = typename typetraits::simd_mask_type; - - if (first == last) return comp(T(0), T(1)) ? std::numeric_limits::max() : std::numeric_limits::lowest(); //stolen from boost::simd - - //Define loop counter - const auto simd_size = simd_type_T::base_length; - const auto size = std::distance(first, last); - //note enforce that input is aligned when we start the main simd loop - const auto range = helper_input_range(first, last); - const auto size_prologue_loop = range.first; - const auto size_simd_loop = range.second; - - auto i = 0u; - auto max_val = *first; //initialize with thirst element - - //---prologue - for (; i < size_prologue_loop; ++i) - { - if (comp(*first, max_val)) - { - max_val = *first; - } - first++; - } - - //---main simd loop - simd_type_T current_max_simd = splat(max_val); - for (; i < size_simd_loop; i += simd_size) - { - const simd_type_T element = load(first); - const simd_mask_T mask = comp(current_max_simd, element); - current_max_simd = blend(current_max_simd, element, mask); - first += simd_size; - } - //extract max from simdtype - for_each(current_max_simd, [&](T el) {if (comp(el, max_val)) { max_val = el; }}); - - //---epilogue - for (; i < size; ++i) - { - if (comp(*first, max_val)) - { - max_val = *first; - } - first++; - } - return max_val; + using simd_type_T = typename simd_traits::simd_type; + using simd_mask_T = typename simd_traits::simd_mask_type; + + if (first == last) return comp(T(0), T(1)) ? std::numeric_limits::max() : std::numeric_limits::lowest(); //stolen from boost::simd + + //Define loop counter + const auto simd_size = simd_type_T::base_length; + const auto size = std::distance(first, last); + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + auto i = 0u; + auto max_val = *first; //initialize with thirst element + + //---prologue + for (; i < size_prologue_loop; ++i) + { + if (comp(*first, max_val)) + { + max_val = *first; } - - //Returns the value of the element with the largest value in the range[first, last[, - //The lowest possible value for the order if the range is empty. - template - T max(T const* first, T const* last) + first++; + } + + //---main simd loop + simd_type_T current_max_simd = splat(max_val); + for (; i < size_simd_loop; i += simd_size) + { + const simd_type_T element = load(first); + const simd_mask_T mask = comp(current_max_simd, element); + current_max_simd = blend(current_max_simd, element, mask); + first += simd_size; + } + //extract max from simdtype + for_each(current_max_simd, [&](T el) {if (comp(el, max_val)) { max_val = el; }}); + + //---epilogue + for (; i < size; ++i) + { + if (comp(*first, max_val)) { -#ifndef NDEBUG //precondition debug mode - if (!first) - throw std::runtime_error("max - null ptr first."); - if (!last) - throw std::runtime_error("max - null ptr last."); -#endif - using simd_type_T = typename typetraits::simd_type; - - if (first == last) return std::numeric_limits::lowest(); - - //Define loop counter - const auto simd_size = simd_type_T::base_length; - const auto size = std::distance(first, last); - //note enforce that input is aligned when we start the main simd loop - const auto range = helper_input_range(first, last); - const auto size_prologue_loop = range.first; - const auto size_simd_loop = range.second; - - auto i = 0u; - auto max_val = *first; //initialize with thirst element - - //---prologue - for (; i < size_prologue_loop; ++i) - { - max_val = std::max(max_val, *first++); - } - //---main simd loop - simd_type_T current_max_simd = splat(max_val); - for (; i < size_simd_loop; i += simd_size) - { - const simd_type_T el = load(first); - current_max_simd = max(current_max_simd, el); - first += simd_size; - } - //extract max from simdtype - max_val = reduce_max(current_max_simd); - - //---epilogue - for (; i < size; ++i) - { - max_val = std::max(max_val, *first++); - } - - return max_val; + max_val = *first; } - - } // namespace SIMDPP_ARCH_NAMESPACE + first++; + } + return max_val; +} + +//Returns the value of the element with the largest value in the range[first, last[, +//The lowest possible value for the order if the range is empty. +template +T max(T const* first, T const* last) +{ +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first) + throw std::runtime_error("max - null ptr first."); + if (!last) + throw std::runtime_error("max - null ptr last."); +#endif + using simd_type_T = typename simd_traits::simd_type; + + if (first == last) return std::numeric_limits::lowest(); + + //Define loop counter + const auto simd_size = simd_type_T::base_length; + const auto size = std::distance(first, last); + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + auto i = 0u; + auto max_val = *first; //initialize with thirst element + + //---prologue + for (; i < size_prologue_loop; ++i) + { + max_val = std::max(max_val, *first++); + } + //---main simd loop + simd_type_T current_max_simd = splat(max_val); + for (; i < size_simd_loop; i += simd_size) + { + const simd_type_T el = load(first); + current_max_simd = max(current_max_simd, el); + first += simd_size; + } + //extract max from simdtype + max_val = reduce_max(current_max_simd); + + //---epilogue + for (; i < size; ++i) + { + max_val = std::max(max_val, *first++); + } + + return max_val; +} + +} // namespace SIMDPP_ARCH_NAMESPACE } // namespace simdpp #endif //LIBSIMDPP_SIMDPP_ALGORITHM_MAX_H diff --git a/simdpp/algorithm/max_element.h b/simdpp/algorithm/max_element.h index bb861a25..06e46f66 100644 --- a/simdpp/algorithm/max_element.h +++ b/simdpp/algorithm/max_element.h @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -18,24 +19,26 @@ Distributed under the Boost Software License, Version 1.0. #include #include #include +#include +#include namespace simdpp { - namespace SIMDPP_ARCH_NAMESPACE { - - template - T const * max_element(T const* first, T const* last, Compare comp) - { - if (first == last) return last; - return find(first, last, max(first, last, comp)); - } - - template - T const * max_element(T const* first, T const* last) - { - if (first == last) return last; - return find(first, last, max(first, last)); - } - - } // namespace SIMDPP_ARCH_NAMESPACE +namespace SIMDPP_ARCH_NAMESPACE { + +template +T const * max_element(T const* first, T const* last, Compare comp) +{ + if (first == last) return last; + return find(first, last, simdpp::max(first, last, comp)); +} + +template +T const * max_element(T const* first, T const* last) +{ + if (first == last) return last; + return find(first, last, simdpp::max(first, last)); +} + +} // namespace SIMDPP_ARCH_NAMESPACE } // namespace simdpp #endif //LIBSIMDPP_SIMDPP_ALGORITHM_MAX_ELEMENT_H diff --git a/simdpp/algorithm/min.h b/simdpp/algorithm/min.h index 18920b9b..9244d541 100644 --- a/simdpp/algorithm/min.h +++ b/simdpp/algorithm/min.h @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -20,122 +21,122 @@ Distributed under the Boost Software License, Version 1.0. #include namespace simdpp { - namespace SIMDPP_ARCH_NAMESPACE { - - //Returns the value of the element with the smallest value in the range[first, last[ over comp, - //The largest possible value for the order if the range is empty. - template - T min(T const* first, T const* last, Comp comp) - { -#ifndef NDEBUG //precondition debug mode - if (!first) - throw std::runtime_error("min - null ptr first."); - if (!last) - throw std::runtime_error("min - null ptr last."); +namespace SIMDPP_ARCH_NAMESPACE { + +//Returns the value of the element with the smallest value in the range[first, last[ over comp, +//The largest possible value for the order if the range is empty. +template +T min(T const* first, T const* last, Comp comp) +{ +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first) + throw std::runtime_error("min - null ptr first."); + if (!last) + throw std::runtime_error("min - null ptr last."); #endif - using simd_type_T = typename typetraits::simd_type; - using simd_mask_T = typename typetraits::simd_mask_type; - - if (first == last) return comp(T(0), T(1)) ? std::numeric_limits::lowest() : std::numeric_limits::max(); //stolen from boost::simd - - //Define loop counter - const auto simd_size = simd_type_T::base_length; - const auto size = std::distance(first, last); - //note enforce that input is aligned when we start the main simd loop - const auto range = helper_input_range(first, last); - const auto size_prologue_loop = range.first; - const auto size_simd_loop = range.second; - - auto i = 0u; - auto min_val = *first; //initialize with thirst element - - //---prologue - for (; i < size_prologue_loop; ++i) - { - if (comp(min_val, *first)) - { - min_val = *first; - } - first++; - } - - //---main simd loop - simd_type_T current_min_simd = splat(min_val); - for (; i < size_simd_loop; i += simd_size) - { - const simd_type_T element = load(first); - const simd_mask_T mask = comp(element, current_min_simd); - current_min_simd = blend(current_min_simd, element, mask); - first += simd_size; - } - //extract min from simdtype - for_each(current_min_simd, [&](T el) {if (comp(min_val, el)) { min_val = el; }}); - - //---epilogue - for (; i < size; ++i) - { - if (comp(min_val, *first)) - { - min_val = *first; - } - first++; - } - return min_val; + using simd_type_T = typename simd_traits::simd_type; + using simd_mask_T = typename simd_traits::simd_mask_type; + + if (first == last) return comp(T(0), T(1)) ? std::numeric_limits::lowest() : std::numeric_limits::max(); //stolen from boost::simd + + //Define loop counter + const auto simd_size = simd_type_T::base_length; + const auto size = std::distance(first, last); + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + auto i = 0u; + auto min_val = *first; //initialize with thirst element + + //---prologue + for (; i < size_prologue_loop; ++i) + { + if (comp(min_val, *first)) + { + min_val = *first; } - - //Returns the value of the element with the smallest value in the range[first, last[, - //The largest possible value for the order if the range is empty. - template - T min(T const* first, T const* last) + first++; + } + + //---main simd loop + simd_type_T current_min_simd = splat(min_val); + for (; i < size_simd_loop; i += simd_size) + { + const simd_type_T element = load(first); + const simd_mask_T mask = comp(element, current_min_simd); + current_min_simd = blend(current_min_simd, element, mask); + first += simd_size; + } + //extract min from simdtype + for_each(current_min_simd, [&](T el) {if (comp(min_val, el)) { min_val = el; }}); + + //---epilogue + for (; i < size; ++i) + { + if (comp(min_val, *first)) { -#ifndef NDEBUG //precondition debug mode - if (!first) - throw std::runtime_error("min - null ptr first."); - if (!last) - throw std::runtime_error("min - null ptr last."); -#endif - using simd_type_T = typename typetraits::simd_type; - - if (first == last) return std::numeric_limits::max(); - - //Define loop counter - const auto simd_size = simd_type_T::base_length; - const auto size = std::distance(first, last); - //note enforce that input is aligned when we start the main simd loop - const auto range = helper_input_range(first, last); - const auto size_prologue_loop = range.first; - const auto size_simd_loop = range.second; - - auto i = 0u; - auto min_val = *first; //initialize with thirst element - - //---prologue - for (; i < size_prologue_loop; ++i) - { - min_val = std::min(min_val, *first++); - } - - //---main simd loop - simd_type_T current_min_simd = splat(min_val); - for (; i < size_simd_loop; i += simd_size) - { - const simd_type_T el = load(first); - current_min_simd = min(el, current_min_simd); - first += simd_size; - } - //extract min from simdtype - min_val = reduce_min(current_min_simd); - - //---epilogue - for (; i < size; ++i) - { - min_val = std::min(min_val, *first++); - } - - return min_val; + min_val = *first; } - - } // namespace SIMDPP_ARCH_NAMESPACE + first++; + } + return min_val; +} + +//Returns the value of the element with the smallest value in the range[first, last[, +//The largest possible value for the order if the range is empty. +template +T min(T const* first, T const* last) +{ +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first) + throw std::runtime_error("min - null ptr first."); + if (!last) + throw std::runtime_error("min - null ptr last."); +#endif + using simd_type_T = typename simd_traits::simd_type; + + if (first == last) return std::numeric_limits::max(); + + //Define loop counter + const auto simd_size = simd_type_T::base_length; + const auto size = std::distance(first, last); + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + auto i = 0u; + auto min_val = *first; //initialize with thirst element + + //---prologue + for (; i < size_prologue_loop; ++i) + { + min_val = std::min(min_val, *first++); + } + + //---main simd loop + simd_type_T current_min_simd = splat(min_val); + for (; i < size_simd_loop; i += simd_size) + { + const simd_type_T el = load(first); + current_min_simd = min(el, current_min_simd); + first += simd_size; + } + //extract min from simdtype + min_val = reduce_min(current_min_simd); + + //---epilogue + for (; i < size; ++i) + { + min_val = std::min(min_val, *first++); + } + + return min_val; +} + +} // namespace SIMDPP_ARCH_NAMESPACE } // namespace simdpp #endif //LIBSIMDPP_SIMDPP_ALGORITHM_MIN_H diff --git a/simdpp/algorithm/min_element.h b/simdpp/algorithm/min_element.h index c17ce343..b33a7df7 100644 --- a/simdpp/algorithm/min_element.h +++ b/simdpp/algorithm/min_element.h @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -18,25 +19,27 @@ Distributed under the Boost Software License, Version 1.0. #include #include #include +#include +#include namespace simdpp { - namespace SIMDPP_ARCH_NAMESPACE { - - template - T const * min_element(T const* first, T const* last, Compare comp) - { - if (first == last) return last; - return find(first, last, min(first, last, comp)); - } - - template - T const * min_element(T const* first, T const* last) - { - if (first == last) return last; - return find(first, last, min(first, last)); - } - - } // namespace SIMDPP_ARCH_NAMESPACE +namespace SIMDPP_ARCH_NAMESPACE { + +template +T const * min_element(T const* first, T const* last, Compare comp) +{ + if (first == last) return last; + return find(first, last, simdpp::min(first, last, comp)); +} + +template +T const * min_element(T const* first, T const* last) +{ + if (first == last) return last; + return find(first, last, simdpp::min(first, last)); +} + +} // namespace SIMDPP_ARCH_NAMESPACE } // namespace simdpp #endif //LIBSIMDPP_SIMDPP_ALGORITHM_MIN_ELEMENT_H diff --git a/simdpp/algorithm/none_of.h b/simdpp/algorithm/none_of.h index 66dacbcd..1adcb49d 100644 --- a/simdpp/algorithm/none_of.h +++ b/simdpp/algorithm/none_of.h @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -15,21 +16,21 @@ #include namespace simdpp { - namespace SIMDPP_ARCH_NAMESPACE { - - template - bool none_of(T const* first, T const* last, UnaryPredicate pred) - { -#ifndef NDEBUG //precondition debug mode - if (!first) - throw std::runtime_error("none_of - null ptr first."); - if (!last) - throw std::runtime_error("none_of - null ptr last."); +namespace SIMDPP_ARCH_NAMESPACE { + +template +bool none_of(T const* first, T const* last, UnaryPredicate pred) +{ +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first) + throw std::runtime_error("none_of - null ptr first."); + if (!last) + throw std::runtime_error("none_of - null ptr last."); #endif - return !any_of(first,last,pred); - } + return !any_of(first,last,pred); +} - } // namespace SIMDPP_ARCH_NAMESPACE +} // namespace SIMDPP_ARCH_NAMESPACE } // namespace simdpp #endif //LIBSIMDPP_SIMDPP_ALGORITHM_NONE_OF_H diff --git a/simdpp/algorithm/reduce.h b/simdpp/algorithm/reduce.h index d729800c..86523e79 100644 --- a/simdpp/algorithm/reduce.h +++ b/simdpp/algorithm/reduce.h @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -19,103 +20,104 @@ #include #include #include +#include namespace simdpp { - namespace SIMDPP_ARCH_NAMESPACE { - - template - T reduce(T const* first, T const* last, T init) - { -#ifndef NDEBUG //precondition debug mode - if (!first) - throw std::runtime_error("reduce - null ptr first."); - if (!last) - throw std::runtime_error("reduce - null ptr last."); +namespace SIMDPP_ARCH_NAMESPACE { + +template +T reduce(T const* first, T const* last, T init) +{ +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first) + throw std::runtime_error("reduce - null ptr first."); + if (!last) + throw std::runtime_error("reduce - null ptr last."); #endif - using simd_type_T = typename typetraits::simd_type; - - simd_type_T accusimd = splat((T)0); - - //Define loop counter - const auto simd_size = simd_type_T::base_length; - const auto size = std::distance(first, last); - //note enforce that input is aligned when we start the main simd loop - const auto range = helper_input_range(first, last); - const auto size_prologue_loop = range.first; - const auto size_simd_loop = range.second; - - auto i = 0u; - //---prologue - for (; i < size_prologue_loop; ++i) - { - init += *first++; - } - //---main simd loop - for (; i < size_simd_loop; i += simd_size) - { - simd_type_T element = load(first); - accusimd = accusimd + element; //TODO need += - first += simd_size; - } - - - //---epilogue - for (; i < size; ++i) - { - init += *first++; - } - - //sum simd residual - init += reduce_add(accusimd); - return init; - } - - template - T reduce(T const* first, T const* last, T init, T neutral, BinOp f) //need neutral element for simd part - { -#ifndef NDEBUG //precondition debug mode - if (!first) - throw std::runtime_error("reduce - null ptr first."); - if (!last) - throw std::runtime_error("reduce - null ptr last."); + using simd_type_T = typename simd_traits::simd_type; + + simd_type_T accusimd = splat((T)0); + + //Define loop counter + const auto simd_size = simd_type_T::base_length; + const auto size = std::distance(first, last); + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + auto i = 0u; + //---prologue + for (; i < size_prologue_loop; ++i) + { + init += *first++; + } + //---main simd loop + for (; i < size_simd_loop; i += simd_size) + { + simd_type_T element = load(first); + accusimd = accusimd + element; //TODO need += + first += simd_size; + } + + + //---epilogue + for (; i < size; ++i) + { + init += *first++; + } + + //sum simd residual + init += reduce_add(accusimd); + return init; +} + +template +T reduce(T const* first, T const* last, T init, T neutral, BinOp f) //need neutral element for simd part +{ +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first) + throw std::runtime_error("reduce - null ptr first."); + if (!last) + throw std::runtime_error("reduce - null ptr last."); #endif - using simd_type_T = typename typetraits::simd_type; - const auto alignment = typetraits::alignment; - - //Define loop counter - const auto size = std::distance(first, last); - const auto simd_size = simd_type_T::base_length; - //note enforce that input is aligned when we start the main simd loop - const auto range = helper_input_range(first, last); - const auto size_prologue_loop = range.first; - const auto size_simd_loop = range.second; - - auto i = 0u; - simd_type_T accusimd = splat(T(neutral)); //think about product sum - - //---prologue - for (; i < size_prologue_loop; ++i) - { - init = f(init, *first++); - } - //---main simd loop - for (; i < size_simd_loop; i += simd_size) - { - simd_type_T element = load(first); - accusimd = f(accusimd, element); - first += simd_size; - } - //---epilogue - for (; i < size; ++i) - { - init = f(init, *first++); - } - - //reduce simd residual - for_each(accusimd, [&](T el) { init = f(init, el); }); - return init; - } - } // namespace SIMDPP_ARCH_NAMESPACE + using simd_type_T = typename simd_traits::simd_type; + const auto alignment = simd_traits::alignment; + + //Define loop counter + const auto size = std::distance(first, last); + const auto simd_size = simd_type_T::base_length; + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + auto i = 0u; + simd_type_T accusimd = splat(T(neutral)); //think about product sum + + //---prologue + for (; i < size_prologue_loop; ++i) + { + init = f(init, *first++); + } + //---main simd loop + for (; i < size_simd_loop; i += simd_size) + { + simd_type_T element = load(first); + accusimd = f(accusimd, element); + first += simd_size; + } + //---epilogue + for (; i < size; ++i) + { + init = f(init, *first++); + } + + //reduce simd residual + for_each(accusimd, [&](T el) { init = f(init, el); }); + return init; +} +} // namespace SIMDPP_ARCH_NAMESPACE } // namespace simdpp #endif //LIBSIMDPP_SIMDPP_ALGORITHM_REDUCE_H diff --git a/simdpp/algorithm/replace.h b/simdpp/algorithm/replace.h index 53643446..951acabc 100644 --- a/simdpp/algorithm/replace.h +++ b/simdpp/algorithm/replace.h @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -15,27 +16,27 @@ #include namespace simdpp { - namespace SIMDPP_ARCH_NAMESPACE { +namespace SIMDPP_ARCH_NAMESPACE { - template - void replace(T* first, T* last, T const & old_val, T const & new_val) - { - struct local_predicate - { - using simd_type_T = typename typetraits::simd_type; - local_predicate(const T & old_val, const T & new_val) : m_old_val(old_val), m_new_val(new_val),m_old_val_simd(splat(old_val)), m_new_val_simd(splat(new_val)) {} +template +void replace(T* first, T* last, T const & old_val, T const & new_val) +{ + struct local_predicate + { + using simd_type_T = typename simd_traits::simd_type; + local_predicate(const T & old_val, const T & new_val) : m_old_val(old_val), m_new_val(new_val),m_old_val_simd(splat(old_val)), m_new_val_simd(splat(new_val)) {} - T operator()( const T& a) const { return a == m_old_val ? m_new_val : a;} - simd_type_T operator()(const simd_type_T& a) const { return blend(m_new_val_simd,a,cmp_eq(a,m_old_val_simd)); } + T operator()( const T& a) const { return a == m_old_val ? m_new_val : a;} + simd_type_T operator()(const simd_type_T& a) const { return blend(m_new_val_simd,a,cmp_eq(a,m_old_val_simd)); } - T m_old_val, m_new_val; - simd_type_T m_old_val_simd, m_new_val_simd; - }; + T m_old_val, m_new_val; + simd_type_T m_old_val_simd, m_new_val_simd; + }; - transform(first, last, first, local_predicate(old_val, new_val)); - } + transform(first, last, first, local_predicate(old_val, new_val)); +} - } // namespace SIMDPP_ARCH_NAMESPACE +} // namespace SIMDPP_ARCH_NAMESPACE } // namespace simdpp #endif //LIBSIMDPP_SIMDPP_ALGORITHM_REPLACE_H diff --git a/simdpp/algorithm/replace_if.h b/simdpp/algorithm/replace_if.h index 435e006f..a592b63c 100644 --- a/simdpp/algorithm/replace_if.h +++ b/simdpp/algorithm/replace_if.h @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -15,27 +16,27 @@ #include namespace simdpp { - namespace SIMDPP_ARCH_NAMESPACE { - - template - void replace_if(T* first, T* last, UnaryPredicate pred , const T& new_val) - { - struct local_predicate - { - using simd_type_T = typename typetraits::simd_type; - local_predicate(const UnaryPredicate& pred,const T & new_val) : m_new_val(new_val),m_new_val_simd(splat(new_val)),m_pred(pred) {} - - T operator()( const T& a) const { return m_pred(a) ? m_new_val : a;} - simd_type_T operator()(const simd_type_T& a) const { return blend(m_new_val_simd,a,m_pred(a)); } - - T m_new_val; - simd_type_T m_new_val_simd; - UnaryPredicate m_pred; - }; - - transform(first, last, first, local_predicate(pred, new_val)); - } - } // namespace SIMDPP_ARCH_NAMESPACE +namespace SIMDPP_ARCH_NAMESPACE { + +template +void replace_if(T* first, T* last, UnaryPredicate pred , const T& new_val) +{ + struct local_predicate + { + using simd_type_T = typename simd_traits::simd_type; + local_predicate(const UnaryPredicate& pred,const T & new_val) : m_new_val(new_val),m_new_val_simd(splat(new_val)),m_pred(pred) {} + + T operator()( const T& a) const { return m_pred(a) ? m_new_val : a;} + simd_type_T operator()(const simd_type_T& a) const { return blend(m_new_val_simd,a,m_pred(a)); } + + T m_new_val; + simd_type_T m_new_val_simd; + UnaryPredicate m_pred; + }; + + transform(first, last, first, local_predicate(pred, new_val)); +} +} // namespace SIMDPP_ARCH_NAMESPACE } // namespace simdpp #endif //LIBSIMDPP_SIMDPP_ALGORITHM_REPLACE_IF_H diff --git a/simdpp/algorithm/transform.h b/simdpp/algorithm/transform.h index 0b51ef30..945c0e21 100644 --- a/simdpp/algorithm/transform.h +++ b/simdpp/algorithm/transform.h @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -22,150 +23,150 @@ #include namespace simdpp { - namespace SIMDPP_ARCH_NAMESPACE { - - template - U* transform(T const* first, T const* last, U* out, UnOp f) - { -#ifndef NDEBUG //precondition debug mode - if (!first) - throw std::runtime_error("transform - null ptr first."); - if (!last) - throw std::runtime_error("transform - null ptr last."); - if (!out) - throw std::runtime_error("transform - null ptr out."); +namespace SIMDPP_ARCH_NAMESPACE { + +template +U* transform(T const* first, T const* last, U* out, UnOp f) +{ +#ifdef SIMDPP_DEBUG //precondition debug mode + if (!first) + throw std::runtime_error("transform - null ptr first."); + if (!last) + throw std::runtime_error("transform - null ptr last."); + if (!out) + throw std::runtime_error("transform - null ptr out."); #endif - using simd_type_T = typename typetraits::simd_type; - using simd_type_U = typename typetraits::simd_type; + using simd_type_T = typename simd_traits::simd_type; + using simd_type_U = typename simd_traits::simd_type; - static_assert (simd_type_T::base_length == simd_type_U::base_length - , "mismatch base_length between T and U" - ); + static_assert (simd_type_T::base_length == simd_type_U::base_length + , "mismatch base_length between T and U" + ); - const auto alignment = typetraits::alignment; + const auto alignment = simd_traits::alignment; - //Define loop counter - const auto simd_size = simd_type_T::base_length; - const auto size = std::distance(first, last); - //note enforce that input is aligned when we start the main simd loop - const auto range = helper_input_range(first, last); - const auto size_prologue_loop = range.first; - const auto size_simd_loop = range.second; + //Define loop counter + const auto simd_size = simd_type_T::base_length; + const auto size = std::distance(first, last); + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; - auto i = 0u; - - //---prologue - for (; i < size_prologue_loop; ++i) - { - *out++ = f(*first++); - } - //---main simd loop - if (detail::is_aligned(out, alignment)) - { - for (; i < size_simd_loop; i += simd_size) - { - simd_type_T element = load(first); - store(out, f(element)); - first += simd_size; - out += simd_size; - } - } - else - { - for (; i < size_simd_loop; i += simd_size) - { - simd_type_T element = load(first); - store_u(out, f(element)); - first += simd_size; - out += simd_size; - } - } - //---epilogue - for (; i < size; ++i) - { - *out++ = f(*first++); - } - return out; + auto i = 0u; + + //---prologue + for (; i < size_prologue_loop; ++i) + { + *out++ = f(*first++); + } + //---main simd loop + if (detail::is_aligned(out, alignment)) + { + for (; i < size_simd_loop; i += simd_size) + { + simd_type_T element = load(first); + store(out, f(element)); + first += simd_size; + out += simd_size; } - template - U* transform(T1 const* first1, T1 const* last1, T2 const* first2, U* out, BinOp f) + } + else + { + for (; i < size_simd_loop; i += simd_size) { -#ifndef NDEBUG //precondition debug mode - if (!first1) - throw std::runtime_error("transform - null ptr first1."); - if (!last1) - throw std::runtime_error("transform - null ptr last1."); - if (!first2) - throw std::runtime_error("transform - null ptr first2."); - if (!out) - throw std::runtime_error("transform - null ptr out."); + simd_type_T element = load(first); + store_u(out, f(element)); + first += simd_size; + out += simd_size; + } + } + //---epilogue + for (; i < size; ++i) + { + *out++ = f(*first++); + } + return out; +} +template +U* transform(T1 const* first1, T1 const* last1, T2 const* first2, U* out, BinOp f) +{ +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first1) + throw std::runtime_error("transform - null ptr first1."); + if (!last1) + throw std::runtime_error("transform - null ptr last1."); + if (!first2) + throw std::runtime_error("transform - null ptr first2."); + if (!out) + throw std::runtime_error("transform - null ptr out."); #endif - using simd_type_T1 = typename typetraits::simd_type; - using simd_type_T2 = typename typetraits::simd_type; - using simd_type_U = typename typetraits::simd_type; - - static_assert (simd_type_T1::base_length == simd_type_T2::base_length - , "mismatch base_length between T1 and T2" - ); - static_assert (simd_type_T1::base_length == simd_type_U::base_length - , "mismatch base_length between T1 and U" - ); - static_assert (simd_type_T2::base_length == simd_type_U::base_length - , "mismatch base_length between T2 and U" - ); - - auto alignment = typetraits::alignment; - - //Define loop counter - const auto simd_size = simd_type_T1::base_length; - const auto size = std::distance(first1, last1); - const auto range = helper_input_range(first1, last1); - const auto size_prologue_loop = range.first; - const auto size_simd_loop = range.second; - - - auto i = 0u; - - //---prologue - for (; i < size_prologue_loop; ++i) - { - *out++ = f(*first1++, *first2++); - } - - //---main simd loop - if (detail::is_aligned(first2, alignment) && detail::is_aligned(out, alignment)) - { - for (; i < size_simd_loop; i += simd_size) - { - simd_type_T1 element1 = load(first1); - simd_type_T2 element2 = load(first2); - store(out, f(element1, element2)); - first1 += simd_size; - first2 += simd_size; - out += simd_size; - } - } - else - { - for (; i < size_simd_loop; i += simd_size) - { - simd_type_T1 element1 = load(first1); - simd_type_T2 element2 = load_u(first2); - store_u(out, f(element1, element2)); - first1 += simd_size; - first2 += simd_size; - out += simd_size; - } - } - //---epilogue - for (; i < size; ++i) - { - *out++ = f(*first1++, *first2++); - } - return out; + using simd_type_T1 = typename simd_traits::simd_type; + using simd_type_T2 = typename simd_traits::simd_type; + using simd_type_U = typename simd_traits::simd_type; + + static_assert (simd_type_T1::base_length == simd_type_T2::base_length + , "mismatch base_length between T1 and T2" + ); + static_assert (simd_type_T1::base_length == simd_type_U::base_length + , "mismatch base_length between T1 and U" + ); + static_assert (simd_type_T2::base_length == simd_type_U::base_length + , "mismatch base_length between T2 and U" + ); + + auto alignment = simd_traits::alignment; + + //Define loop counter + const auto simd_size = simd_type_T1::base_length; + const auto size = std::distance(first1, last1); + const auto range = helper_input_range(first1, last1); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + + auto i = 0u; + + //---prologue + for (; i < size_prologue_loop; ++i) + { + *out++ = f(*first1++, *first2++); + } + + //---main simd loop + if (detail::is_aligned(first2, alignment) && detail::is_aligned(out, alignment)) + { + for (; i < size_simd_loop; i += simd_size) + { + simd_type_T1 element1 = load(first1); + simd_type_T2 element2 = load(first2); + store(out, f(element1, element2)); + first1 += simd_size; + first2 += simd_size; + out += simd_size; + } + } + else + { + for (; i < size_simd_loop; i += simd_size) + { + simd_type_T1 element1 = load(first1); + simd_type_T2 element2 = load_u(first2); + store_u(out, f(element1, element2)); + first1 += simd_size; + first2 += simd_size; + out += simd_size; } - } // namespace SIMDPP_ARCH_NAMESPACE + } + //---epilogue + for (; i < size; ++i) + { + *out++ = f(*first1++, *first2++); + } + return out; +} +} // namespace SIMDPP_ARCH_NAMESPACE } // namespace simdpp #endif //LIBSIMDPP_SIMDPP_ALGORITHM_TRANSFORM_H diff --git a/simdpp/algorithm/transform_reduce.h b/simdpp/algorithm/transform_reduce.h index 96ac8ba9..8c314b38 100644 --- a/simdpp/algorithm/transform_reduce.h +++ b/simdpp/algorithm/transform_reduce.h @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -20,131 +21,131 @@ #include namespace simdpp { - namespace SIMDPP_ARCH_NAMESPACE { - template - T transform_reduce( const T* first, const T* last, T init, BinaryOp binary_op,UnaryOp unary_op) - { -#ifndef NDEBUG //precondition debug mode - if (!first) - throw std::runtime_error("transform_reduce - null ptr first."); - if (!last) - throw std::runtime_error("transform_reduce - null ptr last."); +namespace SIMDPP_ARCH_NAMESPACE { +template +T transform_reduce( const T* first, const T* last, T init, BinaryOp binary_op,UnaryOp unary_op) +{ +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first) + throw std::runtime_error("transform_reduce - null ptr first."); + if (!last) + throw std::runtime_error("transform_reduce - null ptr last."); #endif - using simd_type_T = typename typetraits::simd_type; - - //Define loop counter - const auto simd_size = simd_type_T::base_length; - const auto size = std::distance(first, last); - //note enforce that input is aligned when we start the main simd loop - const auto range = helper_input_range(first, last); - const auto size_prologue_loop = range.first; - const auto size_simd_loop = range.second; - - auto i = 0u; - //---prologue - for (; i < size_prologue_loop; ++i) - { - init = binary_op(init,unary_op(*first++)); - } - - simd_type_T accusimd=splat((T)0); - //---main simd loop - for (; i < size_simd_loop; i += simd_size) - { - simd_type_T element = unary_op(load(first)); - accusimd = binary_op(accusimd,element); - first += simd_size; - } - //reduce simd residual - for_each(accusimd, [&](T el) { init = binary_op(init, el); }); + using simd_type_T = typename simd_traits::simd_type; + + //Define loop counter + const auto simd_size = simd_type_T::base_length; + const auto size = std::distance(first, last); + //note enforce that input is aligned when we start the main simd loop + const auto range = helper_input_range(first, last); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + auto i = 0u; + //---prologue + for (; i < size_prologue_loop; ++i) + { + init = binary_op(init,unary_op(*first++)); + } + + simd_type_T accusimd=splat((T)0); + //---main simd loop + for (; i < size_simd_loop; i += simd_size) + { + simd_type_T element = unary_op(load(first)); + accusimd = binary_op(accusimd,element); + first += simd_size; + } + //reduce simd residual + for_each(accusimd, [&](T el) { init = binary_op(init, el); }); - //---epilogue - for (; i < size; ++i) - { - init = binary_op(init,unary_op(*first++)); - } - return init; + //---epilogue + for (; i < size; ++i) + { + init = binary_op(init,unary_op(*first++)); + } + return init; - } - - template - U transform_reduce(const T1* first1, const T1* last1, const T2* first2, U init, BinaryOp1 binary_op1, BinaryOp2 binary_op2) - { -#ifndef NDEBUG //precondition debug mode - if (!first1) - throw std::runtime_error("transform_reduce - null ptr first1."); - if (!last1) - throw std::runtime_error("transform_reduce - null ptr last1."); - if (!first2) - throw std::runtime_error("transform_reduce - null ptr first2."); +} + +template +U transform_reduce(const T1* first1, const T1* last1, const T2* first2, U init, BinaryOp1 binary_op1, BinaryOp2 binary_op2) +{ +#ifndef SIMDPP_DEBUG //precondition debug mode + if (!first1) + throw std::runtime_error("transform_reduce - null ptr first1."); + if (!last1) + throw std::runtime_error("transform_reduce - null ptr last1."); + if (!first2) + throw std::runtime_error("transform_reduce - null ptr first2."); #endif - using simd_type_T1 = typename typetraits::simd_type; - using simd_type_T2 = typename typetraits::simd_type; - using simd_type_U = typename typetraits::simd_type; - - static_assert (simd_type_T1::base_length == simd_type_T2::base_length - , "mismatch base_length between T1 and T2" - ); - static_assert (simd_type_T1::base_length == simd_type_U::base_length - , "mismatch base_length between T1 and U" - ); - static_assert (simd_type_T2::base_length == simd_type_U::base_length - , "mismatch base_length between T2 and U" - ); - - auto alignment = typetraits::alignment; - - //Define loop counter - const auto simd_size = simd_type_T1::base_length; - const auto size = std::distance(first1, last1); - const auto range = helper_input_range(first1, last1); - const auto size_prologue_loop = range.first; - const auto size_simd_loop = range.second; - - - auto i = 0u; - //---Prologue - for(;i::simd_type; + using simd_type_T2 = typename simd_traits::simd_type; + using simd_type_U = typename simd_traits::simd_type; + + static_assert (simd_type_T1::base_length == simd_type_T2::base_length + , "mismatch base_length between T1 and T2" + ); + static_assert (simd_type_T1::base_length == simd_type_U::base_length + , "mismatch base_length between T1 and U" + ); + static_assert (simd_type_T2::base_length == simd_type_U::base_length + , "mismatch base_length between T2 and U" + ); + + auto alignment = simd_traits::alignment; + + //Define loop counter + const auto simd_size = simd_type_T1::base_length; + const auto size = std::distance(first1, last1); + const auto range = helper_input_range(first1, last1); + const auto size_prologue_loop = range.first; + const auto size_simd_loop = range.second; + + + auto i = 0u; + //---Prologue + for(;i +SIMDPP_INL T* reach_next_aligned(T* ptr, std::size_t alignment) noexcept { - assert(((A & (A - 1)) == 0)); - return (void*)(((std::size_t)ptr + A - 1) &~(A - 1)); //from boost\align\detail\align_up.hpp + assert(((alignment & (alignment - 1)) == 0)); + return reinterpret_cast(((std::size_t)ptr + alignment - 1) &~(alignment - 1)); //from boost\align\detail\align_up.hpp } } // namespace detail diff --git a/simdpp/dispatch/get_arch_string_list.h b/simdpp/dispatch/get_arch_string_list.h index e4872085..dbe76937 100644 --- a/simdpp/dispatch/get_arch_string_list.h +++ b/simdpp/dispatch/get_arch_string_list.h @@ -87,7 +87,7 @@ inline Arch get_arch_string_list(const char* const strings[], int count, const c auto prefixlen = std::strlen(prefix); for (auto i = 0; i < count; ++i) { const char* s = *strings++; - auto len = std::strlen(s); + size_t len = std::strlen(s); // check if s matches prefix if (len < prefixlen) diff --git a/simdpp/simd.h b/simdpp/simd.h index d1b86234..afda9846 100644 --- a/simdpp/simd.h +++ b/simdpp/simd.h @@ -196,31 +196,6 @@ #include #include -//algorithm -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - /** @def SIMDPP_NO_DISPATCHER Disables internal dispatching functionality. If the internal dispathcher mechanism is not needed, the user can define the @c SIMDPP_NO_DISPATCHER. diff --git a/simdpp/types/traits.h b/simdpp/types/traits.h index a0507995..0c93aa60 100644 --- a/simdpp/types/traits.h +++ b/simdpp/types/traits.h @@ -1,8 +1,8 @@ /* Copyright (C) 2012 Povilas Kanapickas - Distributed under the Boost Software License, Version 1.0. - (See accompanying file LICENSE_1_0.txt or copy at - http://www.boost.org/LICENSE_1_0.txt) +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) */ #ifndef LIBSIMDPP_SIMDPP_TYPES_TRAITS_H @@ -16,167 +16,161 @@ #include namespace simdpp { - namespace SIMDPP_ARCH_NAMESPACE { - - /// Allows detection whether specific type is a simdpp vector - template - struct is_vector : std::false_type {}; - - template struct is_vector> : std::true_type {}; - template struct is_vector> : std::true_type {}; - template struct is_vector> : std::true_type {}; - template struct is_vector> : std::true_type {}; - template struct is_vector> : std::true_type {}; - template struct is_vector> : std::true_type {}; - template struct is_vector> : std::true_type {}; - template struct is_vector> : std::true_type {}; - template struct is_vector> : std::true_type {}; - template struct is_vector> : std::true_type {}; - template struct is_vector> : std::true_type {}; - template struct is_vector> : std::true_type {}; - template struct is_vector> : std::true_type {}; - template struct is_vector> : std::true_type {}; - template struct is_vector> : std::true_type {}; - template struct is_vector> : std::true_type {}; - - /// Allows detection whether specific type is a simdpp value (i.e. not expression) vector - template - struct is_value_vector : std::false_type {}; - - template struct is_value_vector> : std::true_type {}; - template struct is_value_vector> : std::true_type {}; - template struct is_value_vector> : std::true_type {}; - template struct is_value_vector> : std::true_type {}; - template struct is_value_vector> : std::true_type {}; - template struct is_value_vector> : std::true_type {}; - template struct is_value_vector> : std::true_type {}; - template struct is_value_vector> : std::true_type {}; - template struct is_value_vector> : std::true_type {}; - template struct is_value_vector> : std::true_type {}; - template struct is_value_vector> : std::true_type {}; - template struct is_value_vector> : std::true_type {}; - template struct is_value_vector> : std::true_type {}; - template struct is_value_vector> : std::true_type {}; - template struct is_value_vector> : std::true_type {}; - template struct is_value_vector> : std::true_type {}; - - /// Allows detection whether specific type is a simdpp mask - template - struct is_mask : std::false_type {}; - - template struct is_mask> : std::true_type {}; - template struct is_mask> : std::true_type {}; - template struct is_mask> : std::true_type {}; - template struct is_mask> : std::true_type {}; - template struct is_mask> : std::true_type {}; - template struct is_mask> : std::true_type {}; - - - /// Define typetraits - template - struct typetraits - { - static const size_t alignment = std::alignment_of::value; - }; - - /// typetraits int8_t - template<> - struct typetraits - { - static const size_t fast_size = SIMDPP_FAST_INT8_SIZE; - using simd_type = int8; - using simd_mask_type = mask_int8; - static const size_t alignment = fast_size; - }; - /// typetraits uint8_t - template<> - struct typetraits - { - static const size_t fast_size = SIMDPP_FAST_INT8_SIZE; - using simd_type = uint8; - using simd_mask_type = mask_int8; - static const size_t alignment = fast_size; - }; - - /// typetraits int16_t - template<> - struct typetraits - { - static const size_t fast_size = SIMDPP_FAST_INT16_SIZE; - using simd_type = int16; - using simd_mask_type = mask_int16; - static const size_t alignment = fast_size * 2; - }; - /// typetraits uint16_t - template<> - struct typetraits - { - static const size_t fast_size = SIMDPP_FAST_INT16_SIZE; - using simd_type = uint16; - using simd_mask_type = mask_int16; - static const size_t alignment = fast_size * 2; - }; - - /// typetraits int32_t - template<> - struct typetraits - { - static const size_t fast_size = SIMDPP_FAST_INT32_SIZE; - using simd_type = int32; - using simd_mask_type = mask_int32; - static const size_t alignment = fast_size * 4; - }; - /// typetraits uint32_t - template<> - struct typetraits - { - static const size_t fast_size = SIMDPP_FAST_INT32_SIZE; - using simd_type = uint32; - using simd_mask_type = mask_int32; - static const size_t alignment = fast_size * 4; - }; - - /// typetraits int64_t - template<> - struct typetraits - { - static const size_t fast_size = SIMDPP_FAST_INT64_SIZE; - using simd_type = int64; - using simd_mask_type = mask_int64; - static const size_t alignment = fast_size * 8; - }; - - /// typetraits uint64_t - template<> - struct typetraits - { - static const size_t fast_size = SIMDPP_FAST_INT64_SIZE; - using simd_type = uint64; - using simd_mask_type = mask_int64; - static const size_t alignment = fast_size * 8; - }; - - /// typetraits float32 - template<> - struct typetraits - { - static const size_t fast_size = SIMDPP_FAST_FLOAT32_SIZE; - using simd_type = float32; - using simd_mask_type = mask_float32; - static const size_t alignment = fast_size * 4; - }; - - /// typetraits float64 - template<> - struct typetraits - { - static const size_t fast_size = SIMDPP_FAST_FLOAT64_SIZE; - using simd_type = float64; - using simd_mask_type = mask_float64; - static const size_t alignment = fast_size * 8; - }; - - } // namespace SIMDPP_ARCH_NAMESPACE +namespace SIMDPP_ARCH_NAMESPACE { + +/// Allows detection whether specific type is a simdpp vector +template +struct is_vector : std::false_type {}; + +template struct is_vector> : std::true_type {}; +template struct is_vector> : std::true_type {}; +template struct is_vector> : std::true_type {}; +template struct is_vector> : std::true_type {}; +template struct is_vector> : std::true_type {}; +template struct is_vector> : std::true_type {}; +template struct is_vector> : std::true_type {}; +template struct is_vector> : std::true_type {}; +template struct is_vector> : std::true_type {}; +template struct is_vector> : std::true_type {}; +template struct is_vector> : std::true_type {}; +template struct is_vector> : std::true_type {}; +template struct is_vector> : std::true_type {}; +template struct is_vector> : std::true_type {}; +template struct is_vector> : std::true_type {}; +template struct is_vector> : std::true_type {}; + +/// Allows detection whether specific type is a simdpp value (i.e. not expression) vector +template +struct is_value_vector : std::false_type {}; + +template struct is_value_vector> : std::true_type {}; +template struct is_value_vector> : std::true_type {}; +template struct is_value_vector> : std::true_type {}; +template struct is_value_vector> : std::true_type {}; +template struct is_value_vector> : std::true_type {}; +template struct is_value_vector> : std::true_type {}; +template struct is_value_vector> : std::true_type {}; +template struct is_value_vector> : std::true_type {}; +template struct is_value_vector> : std::true_type {}; +template struct is_value_vector> : std::true_type {}; +template struct is_value_vector> : std::true_type {}; +template struct is_value_vector> : std::true_type {}; +template struct is_value_vector> : std::true_type {}; +template struct is_value_vector> : std::true_type {}; +template struct is_value_vector> : std::true_type {}; +template struct is_value_vector> : std::true_type {}; + +/// Allows detection whether specific type is a simdpp mask +template +struct is_mask : std::false_type {}; + +template struct is_mask> : std::true_type {}; +template struct is_mask> : std::true_type {}; +template struct is_mask> : std::true_type {}; +template struct is_mask> : std::true_type {}; +template struct is_mask> : std::true_type {}; +template struct is_mask> : std::true_type {}; + + +/// Define simd_traits +template +struct simd_traits +{ + static const size_t alignment = std::alignment_of::value; +}; + +template<> +struct simd_traits +{ + static const size_t fast_size = SIMDPP_FAST_INT8_SIZE; + using simd_type = int8; + using simd_mask_type = mask_int8; + static const size_t alignment = fast_size; +}; + +template<> +struct simd_traits +{ + static const size_t fast_size = SIMDPP_FAST_INT8_SIZE; + using simd_type = uint8; + using simd_mask_type = mask_int8; + static const size_t alignment = fast_size; +}; + +template<> +struct simd_traits +{ + static const size_t fast_size = SIMDPP_FAST_INT16_SIZE; + using simd_type = int16; + using simd_mask_type = mask_int16; + static const size_t alignment = fast_size * 2; +}; + +template<> +struct simd_traits +{ + static const size_t fast_size = SIMDPP_FAST_INT16_SIZE; + using simd_type = uint16; + using simd_mask_type = mask_int16; + static const size_t alignment = fast_size * 2; +}; + +template<> +struct simd_traits +{ + static const size_t fast_size = SIMDPP_FAST_INT32_SIZE; + using simd_type = int32; + using simd_mask_type = mask_int32; + static const size_t alignment = fast_size * 4; +}; + +template<> +struct simd_traits +{ + static const size_t fast_size = SIMDPP_FAST_INT32_SIZE; + using simd_type = uint32; + using simd_mask_type = mask_int32; + static const size_t alignment = fast_size * 4; +}; + +template<> +struct simd_traits +{ + static const size_t fast_size = SIMDPP_FAST_INT64_SIZE; + using simd_type = int64; + using simd_mask_type = mask_int64; + static const size_t alignment = fast_size * 8; +}; + +template<> +struct simd_traits +{ + static const size_t fast_size = SIMDPP_FAST_INT64_SIZE; + using simd_type = uint64; + using simd_mask_type = mask_int64; + static const size_t alignment = fast_size * 8; +}; + +template<> +struct simd_traits +{ + static const size_t fast_size = SIMDPP_FAST_FLOAT32_SIZE; + using simd_type = float32; + using simd_mask_type = mask_float32; + static const size_t alignment = fast_size * 4; +}; + + +template<> +struct simd_traits +{ + static const size_t fast_size = SIMDPP_FAST_FLOAT64_SIZE; + using simd_type = float64; + using simd_mask_type = mask_float64; + static const size_t alignment = fast_size * 8; +}; + +} // namespace SIMDPP_ARCH_NAMESPACE } // namespace simdpp #endif diff --git a/test/insn/all_of.cc b/test/insn/all_of.cc index 11040fe4..0d639346 100644 --- a/test/insn/all_of.cc +++ b/test/insn/all_of.cc @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -10,77 +11,123 @@ #include #include #include +//algorithm +#include namespace SIMDPP_ARCH_NAMESPACE { - template - struct UnaryPredicateEqualValue - { - public: - UnaryPredicateEqualValue(T val) :m_val(val), m_val_simd(simdpp::splat(val)) {} - using simd_mask_T = typename simdpp::typetraits::simd_mask_type; - using simd_type_T = typename simdpp::typetraits::simd_type; +template + struct UnaryPredicateEqualValue +{ +public: + UnaryPredicateEqualValue(T val) :m_val(val), m_val_simd(simdpp::splat(val)) {} + using simd_mask_T = typename simdpp::simd_traits::simd_mask_type; + using simd_type_T = typename simdpp::simd_traits::simd_type; - bool operator()(T a) const { return a == m_val; } - simd_mask_T operator()(const simd_type_T& a) const { return cmp_eq(a, m_val_simd); } + bool operator()(T a) const { return a == m_val; } + simd_mask_T operator()(const simd_type_T& a) const { return cmp_eq(a, m_val_simd); } - T m_val; - simd_type_T m_val_simd; - }; + T m_val; + simd_type_T m_val_simd; +}; + + +template +struct AllOffFuzzingTest +{ + AllOffFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator(10) {} + void operator()(TestReporter& tr) + { + const auto predEqualTen = UnaryPredicateEqualValue((T)10); + const auto predEqualFive = UnaryPredicateEqualValue((T)5); + for (auto size : m_sizes) + { + {//aligned input/ouput predicate match + const auto input(DataGeneratorAligned>(size, m_generator)); + auto res_std=std::all_of(cbegin(input), cend(input), predEqualTen); + auto res_simd=simdpp::all_of(input.data(), input.data() + input.size(),predEqualTen); + TEST_EQUAL(tr, res_std, res_simd); + } + {//non aligned input/ouput predicate match + const auto input(DataGenerator>(size, m_generator)); + auto res_std = std::all_of(cbegin(input), cend(input), predEqualTen); + auto res_simd = simdpp::all_of(input.data(), input.data() + input.size(), predEqualTen); + TEST_EQUAL(tr, res_std, res_simd); + } + + {//aligned input/ouput predicate fail + const auto input(DataGeneratorAligned>(size, m_generator)); + auto res_std = std::all_of(cbegin(input), cend(input), predEqualFive); + auto res_simd = simdpp::all_of(input.data(), input.data() + input.size(), predEqualFive); + TEST_EQUAL(tr, res_std, res_simd); + } + {//non aligned input/ouput predicate fail + const auto input(DataGenerator>(size, m_generator)); + auto res_std = std::all_of(cbegin(input), cend(input), predEqualFive); + auto res_simd = simdpp::all_of(input.data(), input.data() + input.size(), predEqualFive); + TEST_EQUAL(tr, res_std, res_simd); + } + } + } + std::vector m_sizes; + GeneratorConstant m_generator; +}; - template - void test_all_of_type(TestResultsSet& ts, TestReporter& tr) - { - using namespace simdpp; - using vector_t = std::vector; - using vector_aligned_t = std::vector::alignment>>; +template +void test_all_of_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; - {//test with predicate - const auto predEqualTen = UnaryPredicateEqualValue((T)10); - const auto predEqualFive = UnaryPredicateEqualValue((T)5); - { //test prologue - vector_t ivect = { (T)10,(T)10 }; - auto res = all_of(ivect.data(), ivect.data() + ivect.size(), predEqualTen); - auto resstd = std::all_of(begin(ivect), end(ivect), predEqualTen); - TEST_EQUAL(tr, res, resstd); - } - { //test prologue - vector_t ivect = { (T)10,(T)10 }; - auto res = all_of(ivect.data(), ivect.data() + ivect.size(), predEqualFive); - auto resstd = std::all_of(begin(ivect), end(ivect), predEqualFive); - TEST_EQUAL(tr, res, resstd); - } - { //test main loop and epilogue on aligned vector - vector_aligned_t ivect(50, (T)10); - auto res = all_of(ivect.data(), ivect.data() + ivect.size(), predEqualTen); - auto resstd = std::all_of(begin(ivect), end(ivect), predEqualTen); - TEST_EQUAL(tr, res, resstd); - } - { //test main loop and epilogue on aligned vector - vector_aligned_t ivect(50, (T)10); - auto res = all_of(ivect.data(), ivect.data() + ivect.size(), predEqualFive); - auto resstd = std::all_of(begin(ivect), end(ivect), predEqualFive); - TEST_EQUAL(tr, res, resstd); - } - } + {//test with predicate + const auto predEqualTen = UnaryPredicateEqualValue((T)10); + const auto predEqualFive = UnaryPredicateEqualValue((T)5); + { //test prologue + vector_t ivect = { (T)10,(T)10 }; + auto res = all_of(ivect.data(), ivect.data() + ivect.size(), predEqualTen); + auto resstd = std::all_of(begin(ivect), end(ivect), predEqualTen); + TEST_EQUAL(tr, res, resstd); + } + { //test prologue + vector_t ivect = { (T)10,(T)10 }; + auto res = all_of(ivect.data(), ivect.data() + ivect.size(), predEqualFive); + auto resstd = std::all_of(begin(ivect), end(ivect), predEqualFive); + TEST_EQUAL(tr, res, resstd); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(50, (T)10); + auto res = all_of(ivect.data(), ivect.data() + ivect.size(), predEqualTen); + auto resstd = std::all_of(begin(ivect), end(ivect), predEqualTen); + TEST_EQUAL(tr, res, resstd); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(50, (T)10); + auto res = all_of(ivect.data(), ivect.data() + ivect.size(), predEqualFive); + auto resstd = std::all_of(begin(ivect), end(ivect), predEqualFive); + TEST_EQUAL(tr, res, resstd); + } + AllOffFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144 });//0 generate null ptr inputs/ouput + fuzzing(tr); + } - } +} - void test_all_of(TestResults& res, TestReporter& tr) - { - using namespace simdpp; - TestResultsSet& ts = res.new_results_set("all_of"); - //test_all_of_type(ts, tr); //FIXME - //test_all_of_type(ts, tr); //FIXME - test_all_of_type(ts, tr); - test_all_of_type(ts, tr); - test_all_of_type(ts, tr); - test_all_of_type(ts, tr); - test_all_of_type(ts, tr); - test_all_of_type(ts, tr); - test_all_of_type(ts, tr); - test_all_of_type(ts, tr); - } +void test_all_of(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("all_of"); + //test_all_of_type(ts, tr); //FIXME + //test_all_of_type(ts, tr); //FIXME + test_all_of_type(ts, tr); + test_all_of_type(ts, tr); + test_all_of_type(ts, tr); + test_all_of_type(ts, tr); + test_all_of_type(ts, tr); + test_all_of_type(ts, tr); + test_all_of_type(ts, tr); + test_all_of_type(ts, tr); +} } // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/any_of.cc b/test/insn/any_of.cc index de7f2092..dae18923 100644 --- a/test/insn/any_of.cc +++ b/test/insn/any_of.cc @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -10,64 +11,111 @@ #include #include #include +//algorithm +#include +#include /* srand, rand */ +#include /* time */ namespace SIMDPP_ARCH_NAMESPACE { - template - struct UnaryPredicateEqualValue - { - public: - UnaryPredicateEqualValue(T val) :m_val(val), m_val_simd(simdpp::splat(val)) {} - using simd_mask_T = typename simdpp::typetraits::simd_mask_type; - using simd_type_T = typename simdpp::typetraits::simd_type; +template + struct UnaryPredicateEqualValue +{ +public: + UnaryPredicateEqualValue(T val) :m_val(val), m_val_simd(simdpp::splat(val)) {} + using simd_mask_T = typename simdpp::simd_traits::simd_mask_type; + using simd_type_T = typename simdpp::simd_traits::simd_type; - bool operator()(T a) const { return a == m_val; } - simd_mask_T operator()(const simd_type_T& a) const { return cmp_eq(a, m_val_simd); } + bool operator()(T a) const { return a == m_val; } + simd_mask_T operator()(const simd_type_T& a) const { return cmp_eq(a, m_val_simd); } - T m_val; - simd_type_T m_val_simd; - }; + T m_val; + simd_type_T m_val_simd; +}; - - template - void test_any_of_type(TestResultsSet& ts, TestReporter& tr) - { - using namespace simdpp; - using vector_t = std::vector; - using vector_aligned_t = std::vector::alignment>>; - - {//test with predicate - const auto predEqualTen = UnaryPredicateEqualValue((T)10); - { //test prologue - vector_t ivect = { (T)1,(T)10 }; - auto res = any_of(ivect.data(), ivect.data() + ivect.size(), predEqualTen); - auto resstd = std::any_of(begin(ivect), end(ivect), predEqualTen); - TEST_EQUAL(tr, res, resstd); - } - { //test main loop and epilogue on aligned vector - vector_aligned_t ivect(50, (T)5); - auto res = any_of(ivect.data(), ivect.data() + ivect.size(), predEqualTen); - auto resstd = std::any_of(begin(ivect), end(ivect), predEqualTen); - TEST_EQUAL(tr, res, resstd); - } - } +template +struct AnyOffFuzzingTest +{ + AnyOffFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator(5) {} + void operator()(TestReporter& tr) + { + const auto predEqualTen = UnaryPredicateEqualValue((T)10); + srand((unsigned int)time(nullptr)); + for (auto size : m_sizes) + { + {//aligned input/ouput predicate match + auto input(DataGeneratorAligned>(size, m_generator)); + input[(size_t)(rand() % input.size())]=10; + auto res_std = std::any_of(cbegin(input), cend(input), predEqualTen); + auto res_simd = simdpp::any_of(input.data(), input.data() + input.size(), predEqualTen); + TEST_EQUAL(tr, res_std, res_simd); + } + {//non aligned input/ouput predicate match + auto input(DataGenerator>(size, m_generator)); + input[(size_t)(rand() % input.size())] = 10; + auto res_std = std::any_of(cbegin(input), cend(input), predEqualTen); + auto res_simd = simdpp::any_of(input.data(), input.data() + input.size(), predEqualTen); + TEST_EQUAL(tr, res_std, res_simd); + } + + {//aligned input/ouput predicate fail + auto input(DataGeneratorAligned>(size, m_generator)); + auto res_std = std::any_of(cbegin(input), cend(input), predEqualTen); + auto res_simd = simdpp::any_of(input.data(), input.data() + input.size(), predEqualTen); + TEST_EQUAL(tr, res_std, res_simd); + } + {//non aligned input/ouput predicate fail + auto input(DataGenerator>(size, m_generator)); + auto res_std = std::any_of(cbegin(input), cend(input), predEqualTen); + auto res_simd = simdpp::any_of(input.data(), input.data() + input.size(), predEqualTen); + TEST_EQUAL(tr, res_std, res_simd); + } + } + } + std::vector m_sizes; + GeneratorConstant m_generator; +}; + +template + void test_any_of_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; - } + {//test with predicate + const auto predEqualTen = UnaryPredicateEqualValue((T)10); + { //test prologue + vector_t ivect = { (T)1,(T)10 }; + auto res = any_of(ivect.data(), ivect.data() + ivect.size(), predEqualTen); + auto resstd = std::any_of(begin(ivect), end(ivect), predEqualTen); + TEST_EQUAL(tr, res, resstd); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(50, (T)5); + auto res = any_of(ivect.data(), ivect.data() + ivect.size(), predEqualTen); + auto resstd = std::any_of(begin(ivect), end(ivect), predEqualTen); + TEST_EQUAL(tr, res, resstd); + } + } + AnyOffFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144 });//0 generate null ptr inputs/ouput + fuzzing(tr); +} - void test_any_of(TestResults& res, TestReporter& tr) - { - using namespace simdpp; - TestResultsSet& ts = res.new_results_set("any_of"); - //test_any_of_type(ts, tr); //FIXME - //test_any_of_type(ts, tr); //FIXME - test_any_of_type(ts, tr); - test_any_of_type(ts, tr); - test_any_of_type(ts, tr); - test_any_of_type(ts, tr); - test_any_of_type(ts, tr); - test_any_of_type(ts, tr); - test_any_of_type(ts, tr); - test_any_of_type(ts, tr); - } +void test_any_of(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("any_of"); + //test_any_of_type(ts, tr); //FIXME + //test_any_of_type(ts, tr); //FIXME + test_any_of_type(ts, tr); + test_any_of_type(ts, tr); + test_any_of_type(ts, tr); + test_any_of_type(ts, tr); + test_any_of_type(ts, tr); + test_any_of_type(ts, tr); + test_any_of_type(ts, tr); + test_any_of_type(ts, tr); +} } // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/copy.cc b/test/insn/copy.cc index f11cd630..88970dc4 100644 --- a/test/insn/copy.cc +++ b/test/insn/copy.cc @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -10,70 +11,99 @@ Distributed under the Boost Software License, Version 1.0. #include #include #include - +//algorithm +#include +#include namespace SIMDPP_ARCH_NAMESPACE { - template - void test_copy_type(TestResultsSet& ts, TestReporter& tr) - { - using namespace simdpp; - using vector_t = std::vector; - using vector_aligned_t = std::vector::alignment>>; - { //test prologue - vector_t ivect = { (T)42,(T)42 }; - vector_t ovect = { (T)0,(T)0 }; - copy(ivect.data(), ivect.data() + ivect.size(), ovect.data()); - for (auto i = 0; i < ovect.size(); ++i) //TODO make TEST_EQUAL_COLECTIONS - { - TEST_EQUAL(tr, ivect[i], ovect[i]); +template +struct CopyFuzzingTest +{ + CopyFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator() {} + void operator()(TestReporter& tr) + { + for (auto size : m_sizes) + { + {//aligned input/ouput + auto input(DataGeneratorAligned>(size, m_generator)); + std::vector::alignment>> expected(size); + std::vector::alignment>> output(size); + std::copy(cbegin(input),cend(input),begin(expected)); + simdpp::copy(input.data(), input.data()+input.size(), output.data()); + TEST_EQUAL_COLLECTIONS(tr,output,expected); } - } - { //test main loop and epilogue on aligned vector - vector_aligned_t ivect(150, (T)42); - vector_t ovect(150, (T)0); - - copy(ivect.data(), ivect.data() + ivect.size(), ovect.data()); - for (auto i = 0; i < ovect.size(); ++i) //TODO make TEST_EQUAL_COLECTIONS - { - TEST_EQUAL(tr, ovect[i], ivect[i]); + {//unaligned input/ouput + auto input(DataGenerator>(size, m_generator)); + std::vector expected(size); + std::vector output(size); + std::copy(cbegin(input), cend(input), begin(expected)); + simdpp::copy(input.data(), input.data() + input.size(), output.data()); + TEST_EQUAL_COLLECTIONS(tr, output, expected); } } - { //test main loop and epilogue on range - vector_aligned_t ivect(150, (T)42); - vector_t ovect(150, (T)0); + } + std::vector m_sizes; + GeneratorRandom m_generator; +}; - copy(ivect.data() + 10u, ivect.data() + ivect.size() - 10u, ovect.data()+10u); - for (auto i = 0u; i < 10u; ++i) //TODO make TEST_EQUAL_COLECTIONS - { - TEST_EQUAL(tr, ovect[i], (T)0); - } - for (auto i = 10; i < ovect.size() - 10u; ++i) //TODO make TEST_EQUAL_COLECTIONS - { - TEST_EQUAL(tr, ovect[i], ivect[i]); - } - for (auto i = ovect.size() - 10u; i < ovect.size(); ++i) //TODO make TEST_EQUAL_COLECTIONS - { - TEST_EQUAL(tr, ovect[i], (T)0); - } - } +template +void test_copy_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + { //test prologue + vector_t ivect = { (T)42,(T)42 }; + vector_t ovect = { (T)0,(T)0 }; + + copy(ivect.data(), ivect.data() + ivect.size(), ovect.data()); + TEST_EQUAL_COLLECTIONS(tr, ivect, ovect); } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(150, (T)42); + vector_t ovect(150, (T)0); - void test_copy(TestResults& res, TestReporter& tr) - { - using namespace simdpp; - TestResultsSet& ts = res.new_results_set("copy"); - test_copy_type(ts, tr); - test_copy_type(ts, tr); - test_copy_type(ts, tr); - test_copy_type(ts, tr); - test_copy_type(ts, tr); - test_copy_type(ts, tr); - test_copy_type(ts, tr); - test_copy_type(ts, tr); - test_copy_type(ts, tr); - test_copy_type(ts, tr); + copy(ivect.data(), ivect.data() + ivect.size(), ovect.data()); + TEST_EQUAL_COLLECTIONS(tr, ivect, ovect); } + { //test main loop and epilogue on range + vector_aligned_t ivect(150, (T)42); + vector_t ovect(150, (T)0); + + copy(ivect.data() + 10u, ivect.data() + ivect.size() - 10u, ovect.data()+10u); + for (auto i = 0u; i < 10u; ++i) + { + TEST_EQUAL(tr, ovect[i], (T)0); + } + for (auto i = 10; i < ovect.size() - 10u; ++i) + { + TEST_EQUAL(tr, ovect[i], ivect[i]); + } + for (auto i = ovect.size() - 10u; i < ovect.size(); ++i) + { + TEST_EQUAL(tr, ovect[i], (T)0); + } + } + CopyFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144 });//0 generate null ptr inputs/ouput + fuzzing(tr); +} + +void test_copy(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("copy"); + test_copy_type(ts, tr); + test_copy_type(ts, tr); + test_copy_type(ts, tr); + test_copy_type(ts, tr); + test_copy_type(ts, tr); + test_copy_type(ts, tr); + test_copy_type(ts, tr); + test_copy_type(ts, tr); + test_copy_type(ts, tr); + test_copy_type(ts, tr); +} } // namespace SIMDPP_ARCH_NAMESPACE \ No newline at end of file diff --git a/test/insn/copy_n.cc b/test/insn/copy_n.cc index 343ee352..7cd905d8 100644 --- a/test/insn/copy_n.cc +++ b/test/insn/copy_n.cc @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -10,65 +11,65 @@ Distributed under the Boost Software License, Version 1.0. #include #include #include - +//algorithm +#include namespace SIMDPP_ARCH_NAMESPACE { - template - void test_copy_n_type(TestResultsSet& ts, TestReporter& tr) - { - using namespace simdpp; - using vector_t = std::vector; - using vector_aligned_t = std::vector::alignment>>; - { //test prologue - vector_t ivect = { (T)42,(T)42 }; - vector_t ovect = { (T)0,(T)0 }; - copy_n(ivect.data(),2, ovect.data()); - for (auto i = 0; i < ovect.size(); ++i) //TODO make TEST_EQUAL_COLECTIONS - { - TEST_EQUAL(tr, ivect[i], ovect[i]); - } - } - { //test negative don't change ovect - vector_t ivect = { (T)42,(T)42 }; - vector_t ovect = { (T)0,(T)0 }; +template +void test_copy_n_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + { //test prologue + vector_t ivect = { (T)42,(T)42 }; + vector_t ovect = { (T)0,(T)0 }; - copy_n(ivect.data(), -2, ovect.data()); + copy_n(ivect.data(),2, ovect.data()); + TEST_EQUAL_COLLECTIONS(tr, ivect, ovect); + } + { //test negative don't change ovect + vector_t ivect = { (T)42,(T)42 }; + vector_t ovect = { (T)0,(T)0 }; - TEST_EQUAL(tr, (T)0, ovect[0]); - TEST_EQUAL(tr, (T)0, ovect[1]); - } - { //test main loop and epilogue on aligned vector - vector_aligned_t ivect(150, (T)42); - vector_t ovect(150, (T)0); + copy_n(ivect.data(), -2, ovect.data()); - copy_n(ivect.data(),100, ovect.data()); - for (auto i = 0; i < 100; ++i) //TODO make TEST_EQUAL_COLECTIONS - { - TEST_EQUAL(tr, ovect[i], ivect[i]); - } - for (auto i = 100; i (ts, tr); - test_copy_n_type(ts, tr); - test_copy_n_type(ts, tr); - test_copy_n_type(ts, tr); - test_copy_n_type(ts, tr); - test_copy_n_type(ts, tr); - test_copy_n_type(ts, tr); - test_copy_n_type(ts, tr); - test_copy_n_type(ts, tr); - test_copy_n_type(ts, tr); + copy_n(ivect.data(),100, ovect.data()); + for (auto i = 0; i < 100; ++i) + { + TEST_EQUAL(tr, ovect[i], ivect[i]); + } + for (auto i = 100; i (ts, tr); + test_copy_n_type(ts, tr); + test_copy_n_type(ts, tr); + test_copy_n_type(ts, tr); + test_copy_n_type(ts, tr); + test_copy_n_type(ts, tr); + test_copy_n_type(ts, tr); + test_copy_n_type(ts, tr); + test_copy_n_type(ts, tr); + test_copy_n_type(ts, tr); +} } // namespace SIMDPP_ARCH_NAMESPACE \ No newline at end of file diff --git a/test/insn/count.cc b/test/insn/count.cc index 9ea7261a..7cba5304 100644 --- a/test/insn/count.cc +++ b/test/insn/count.cc @@ -1,47 +1,89 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +*/ + #include "../utils/test_helpers.h" #include "../utils/test_results.h" #include #include #include +//algorithm +#include namespace SIMDPP_ARCH_NAMESPACE { - template - void test_count_type(TestResultsSet& ts, TestReporter& tr) + +template +struct CountFuzzingTest +{ + CountFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator(42) {} + void operator()(TestReporter& tr) { - using namespace simdpp; - using vector_t = std::vector; - using vector_aligned_t = std::vector::alignment>>; - { //test prologue - vector_t ivect = { (T)42,(T)42 }; - auto res = count(ivect.data(), ivect.data() + ivect.size(), (T)42); - auto resstd = std::count(begin(ivect), end(ivect), (T)42); - TEST_EQUAL(tr, res, resstd); - } - { //test main loop and epilogue on aligned vector - vector_aligned_t ivect(50, (T)42); - ivect[25] = (T)0; - ivect[49] = (T)0; - auto res = count(ivect.data(), ivect.data() + ivect.size(), (T)42); - auto resstd = std::count(begin(ivect), end(ivect), (T)42); - TEST_EQUAL(tr, res, resstd); + for (auto size : m_sizes) + { + {//aligned input/ouput + auto input(DataGeneratorAligned>(size, m_generator)); + input[(input.size()-1)%2]=(T)0; + auto res_std=std::count(cbegin(input), cend(input),(T)42); + auto res_simd=simdpp::count(input.data(), input.data() + input.size(), (T)42); + TEST_EQUAL(tr, res_std, res_simd); + } + {//unaligned input/ouput + auto input(DataGenerator>(size, m_generator)); + input[(input.size()-1) % 2] = (T)0; + auto res_std = std::count(cbegin(input), cend(input), (T)42); + auto res_simd = simdpp::count(input.data(), input.data() + input.size(), (T)42); + TEST_EQUAL(tr, res_std, res_simd); + + } } } + std::vector m_sizes; + GeneratorConstant m_generator; +}; - void test_count(TestResults& res, TestReporter& tr) - { - using namespace simdpp; - TestResultsSet& ts = res.new_results_set("count"); - test_count_type(ts, tr); - test_count_type(ts, tr); - test_count_type(ts, tr); - test_count_type(ts, tr); - test_count_type(ts, tr); - test_count_type(ts, tr); - test_count_type(ts, tr); - test_count_type(ts, tr); - test_count_type(ts, tr); - test_count_type(ts, tr); +template +void test_count_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + { //test prologue + vector_t ivect = { (T)42,(T)42 }; + auto res = count(ivect.data(), ivect.data() + ivect.size(), (T)42); + auto resstd = std::count(begin(ivect), end(ivect), (T)42); + TEST_EQUAL(tr, res, resstd); } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(50, (T)42); + ivect[25] = (T)0; + ivect[49] = (T)0; + auto res = count(ivect.data(), ivect.data() + ivect.size(), (T)42); + auto resstd = std::count(begin(ivect), end(ivect), (T)42); + TEST_EQUAL(tr, res, resstd); + } + CountFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144 });//0 generate null ptr inputs/ouput + fuzzing(tr); +} + +void test_count(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("count"); + test_count_type(ts, tr); + test_count_type(ts, tr); + test_count_type(ts, tr); + test_count_type(ts, tr); + test_count_type(ts, tr); + test_count_type(ts, tr); + test_count_type(ts, tr); + test_count_type(ts, tr); + test_count_type(ts, tr); + test_count_type(ts, tr); +} } // namespace SIMDPP_ARCH_NAMESPACE \ No newline at end of file diff --git a/test/insn/count_if.cc b/test/insn/count_if.cc index 8b310b8c..4b5f8437 100644 --- a/test/insn/count_if.cc +++ b/test/insn/count_if.cc @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -11,61 +12,95 @@ Distributed under the Boost Software License, Version 1.0. #include #include #include +//algorithm +#include namespace SIMDPP_ARCH_NAMESPACE { - template - struct UnaryPredicateEqualValue - { - public: - UnaryPredicateEqualValue(T val) :m_val(val), m_val_simd(simdpp::splat(val)) {} - using simd_mask_T = typename simdpp::typetraits::simd_mask_type; - using simd_type_T = typename simdpp::typetraits::simd_type; +template +struct UnaryPredicateEqualValue +{ +public: + UnaryPredicateEqualValue(T val) :m_val(val), m_val_simd(simdpp::splat(val)) {} + using simd_mask_T = typename simdpp::simd_traits::simd_mask_type; + using simd_type_T = typename simdpp::simd_traits::simd_type; - bool operator()(T a) const { return a == m_val; } - simd_mask_T operator()(const simd_type_T& a) const { return cmp_eq(a, m_val_simd); } + bool operator()(T a) const { return a == m_val; } + simd_mask_T operator()(const simd_type_T& a) const { return cmp_eq(a, m_val_simd); } - T m_val; - simd_type_T m_val_simd; - }; + T m_val; + simd_type_T m_val_simd; +}; - template - void test_count_if_type(TestResultsSet& ts, TestReporter& tr) +template +struct CountIfFuzzingTest +{ + CountIfFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator(42) {} + void operator()(TestReporter& tr) { - using namespace simdpp; - using vector_t = std::vector; const auto pred = UnaryPredicateEqualValue((T)42); - using vector_aligned_t = std::vector::alignment>>; - { //test prologue - vector_t ivect = { (T)42,(T)42 }; - auto res = count_if(ivect.data(), ivect.data() + ivect.size(), pred); - auto resstd = std::count_if(begin(ivect), end(ivect), pred); - TEST_EQUAL(tr, res, resstd); - } - { //test main loop and epilogue on aligned vector - vector_aligned_t ivect(50, (T)42); - ivect[25] = (T)0; - ivect[49] = (T)0; - auto res = count_if(ivect.data(), ivect.data() + ivect.size(), pred); - auto resstd = std::count_if(begin(ivect), end(ivect), pred); - TEST_EQUAL(tr, res, resstd); + for (auto size : m_sizes) + { + {//aligned input/ouput + auto input(DataGeneratorAligned>(size, m_generator)); + input[(input.size() - 1) % 2] = (T)0; + auto res_std = std::count_if(cbegin(input), cend(input),pred); + auto res_simd = simdpp::count_if(input.data(), input.data() + input.size(), pred); + TEST_EQUAL(tr, res_std, res_simd); + } + {//unaligned input/ouput + auto input(DataGenerator>(size, m_generator)); + input[(input.size() - 1) % 2] = (T)0; + auto res_std = std::count_if(cbegin(input), cend(input), pred); + auto res_simd = simdpp::count_if(input.data(), input.data() + input.size(), pred); + TEST_EQUAL(tr, res_std, res_simd); + + } } } + std::vector m_sizes; + GeneratorConstant m_generator; +}; - void test_count_if(TestResults& res, TestReporter& tr) - { - using namespace simdpp; - TestResultsSet& ts = res.new_results_set("count_if"); - test_count_if_type(ts, tr); - test_count_if_type(ts, tr); - test_count_if_type(ts, tr); - test_count_if_type(ts, tr); - test_count_if_type(ts, tr); - test_count_if_type(ts, tr); - test_count_if_type(ts, tr); - test_count_if_type(ts, tr); - test_count_if_type(ts, tr); - test_count_if_type(ts, tr); +template +void test_count_if_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + const auto pred = UnaryPredicateEqualValue((T)42); + using vector_aligned_t = std::vector::alignment>>; + { //test prologue + vector_t ivect = { (T)42,(T)42 }; + auto res = count_if(ivect.data(), ivect.data() + ivect.size(), pred); + auto resstd = std::count_if(begin(ivect), end(ivect), pred); + TEST_EQUAL(tr, res, resstd); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(50, (T)42); + ivect[25] = (T)0; + ivect[49] = (T)0; + auto res = count_if(ivect.data(), ivect.data() + ivect.size(), pred); + auto resstd = std::count_if(begin(ivect), end(ivect), pred); + TEST_EQUAL(tr, res, resstd); } + CountIfFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144 });//0 generate null ptr inputs/ouput + fuzzing(tr); +} + +void test_count_if(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("count_if"); + test_count_if_type(ts, tr); + test_count_if_type(ts, tr); + test_count_if_type(ts, tr); + test_count_if_type(ts, tr); + test_count_if_type(ts, tr); + test_count_if_type(ts, tr); + test_count_if_type(ts, tr); + test_count_if_type(ts, tr); + test_count_if_type(ts, tr); + test_count_if_type(ts, tr); +} } // namespace SIMDPP_ARCH_NAMESPACE \ No newline at end of file diff --git a/test/insn/equal.cc b/test/insn/equal.cc index 5944c653..eb440c32 100644 --- a/test/insn/equal.cc +++ b/test/insn/equal.cc @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -10,50 +11,82 @@ Distributed under the Boost Software License, Version 1.0. #include #include #include - +//algorithm +#include namespace SIMDPP_ARCH_NAMESPACE { - template - void test_equal_type(TestResultsSet& ts, TestReporter& tr) +template +struct EqualFuzzingTest +{ + EqualFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator(0) {} + void operator()(TestReporter& tr) { - using namespace simdpp; - using vector_t = std::vector; - using vector_aligned_t = std::vector::alignment>>; - { //test prologue - vector_t ivect1 = { (T)42,(T)42 }; - vector_t ivect2 = { (T)0,(T)0 }; - - auto res=equal(ivect1.data(), ivect1.data() + ivect1.size(), ivect2.data()); - TEST_EQUAL(tr,res,false); - } - { //test main loop and epilogue on aligned vector - vector_aligned_t ivect1(50); - std::iota(begin(ivect1),end(ivect1),(T)1); - vector_aligned_t ivect2(50); - std::copy(begin(ivect1),end(ivect1),begin(ivect2)); - auto res=equal(ivect1.data(), ivect1.data() + ivect1.size(), ivect2.data()); - TEST_EQUAL(tr,res,true); - ivect2[25]=0; - auto res2=equal(ivect1.data(), ivect1.data() + ivect1.size(), ivect2.data()); - TEST_EQUAL(tr,res2,false); + for (auto size : m_sizes) + { + {//aligned input/ouput + auto input(DataGeneratorAligned>(size, m_generator)); + auto input2(DataGeneratorAligned>(size, m_generator)); + auto res_std = std::equal(cbegin(input), cend(input), cbegin(input2)); + auto res_simd = simdpp::equal(input.data(), input.data() + input.size(), input2.data()); + TEST_EQUAL(tr, res_std, res_simd); + } + {//unaligned input/ouput + auto input(DataGenerator>(size, m_generator)); + auto input2(DataGenerator>(size, m_generator)); + auto res_std = std::equal(cbegin(input), cend(input), cbegin(input2)); + auto res_simd = simdpp::equal(input.data(), input.data() + input.size(), input2.data()); + TEST_EQUAL(tr, res_std, res_simd); + + } } } + std::vector m_sizes; + GeneratorIota m_generator; +}; - void test_equal(TestResults& res, TestReporter& tr) - { - using namespace simdpp; - TestResultsSet& ts = res.new_results_set("copy"); - // test_equal_type(ts, tr); //FIXME - // test_equal_type(ts, tr); //FIXME - test_equal_type(ts, tr); - test_equal_type(ts, tr); - test_equal_type(ts, tr); - test_equal_type(ts, tr); - test_equal_type(ts, tr); - test_equal_type(ts, tr); - test_equal_type(ts, tr); - test_equal_type(ts, tr); +template +void test_equal_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + { //test prologue + vector_t ivect1 = { (T)42,(T)42 }; + vector_t ivect2 = { (T)0,(T)0 }; + + auto res=equal(ivect1.data(), ivect1.data() + ivect1.size(), ivect2.data()); + TEST_EQUAL(tr,res,false); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect1(50); + std::iota(begin(ivect1),end(ivect1),(T)1); + vector_aligned_t ivect2(50); + std::copy(begin(ivect1),end(ivect1),begin(ivect2)); + auto res=equal(ivect1.data(), ivect1.data() + ivect1.size(), ivect2.data()); + TEST_EQUAL(tr,res,true); + ivect2[25]=0; + auto res2=equal(ivect1.data(), ivect1.data() + ivect1.size(), ivect2.data()); + TEST_EQUAL(tr,res2,false); } + EqualFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144 });//0 generate null ptr inputs/ouput + fuzzing(tr); +} + +void test_equal(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("copy"); + // test_equal_type(ts, tr); //FIXME + // test_equal_type(ts, tr); //FIXME + test_equal_type(ts, tr); + test_equal_type(ts, tr); + test_equal_type(ts, tr); + test_equal_type(ts, tr); + test_equal_type(ts, tr); + test_equal_type(ts, tr); + test_equal_type(ts, tr); + test_equal_type(ts, tr); +} } // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/fill.cc b/test/insn/fill.cc index 1796a85f..a69c27a7 100644 --- a/test/insn/fill.cc +++ b/test/insn/fill.cc @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -10,62 +11,86 @@ Distributed under the Boost Software License, Version 1.0. #include #include #include - +#include namespace SIMDPP_ARCH_NAMESPACE { - template - void test_fill_type(TestResultsSet& ts, TestReporter& tr) - { - using namespace simdpp; - using vector_t = std::vector; - using vector_aligned_t = std::vector::alignment>>; - { //test prologue - vector_t ivect = { (T)0,(T)1 }; - vector_t expected = { (T)42,(T)42 }; - fill(ivect.data(), ivect.data() + ivect.size(), (T)42); - for (auto i = 0; i < expected.size(); ++i) //TODO make TEST_EQUAL_COLECTIONS - { - TEST_EQUAL(tr, expected[i], ivect[i]); +template +struct FillFuzzingTest +{ + FillFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator(42) {} + void operator()(TestReporter& tr) + { + for (auto size : m_sizes) + { + {//aligned input/ouput + auto expected(DataGeneratorAligned>(size, m_generator)); + std::vector::alignment>> output(size); + simdpp::fill(output.data(), output.data() + output.size(),(T)42); + TEST_EQUAL_COLLECTIONS(tr, output, expected); } - } - { //test main loop and epilogue on aligned vector - vector_aligned_t ivect(150, (T)0); - vector_t expected(150, (T)42); - - fill(ivect.data(), ivect.data() + ivect.size(), (T)42); - for (auto i = 0; i < expected.size(); ++i) //TODO make TEST_EQUAL_COLECTIONS - { - TEST_EQUAL(tr, expected[i], ivect[i]); + {//unaligned input/ouput + auto expected(DataGenerator>(size, m_generator)); + std::vector output(size); + simdpp::fill(output.data(), output.data() + output.size(), (T)42); + TEST_EQUAL_COLLECTIONS(tr, output, expected); } } - { //test main loop and epilogue on range - vector_aligned_t ivect(150, (T)0); - vector_t expected(150, (T)42); + } + std::vector m_sizes; + GeneratorConstant m_generator; +}; - fill(ivect.data() + 10u, ivect.data() + ivect.size() - 10u, (T)42); - for (auto i = 10; i < expected.size() - 10u; ++i) //TODO make TEST_EQUAL_COLECTIONS - { - TEST_EQUAL(tr, expected[i], ivect[i]); - } - } + +template +void test_fill_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + { //test prologue + vector_t ivect = { (T)0,(T)1 }; + vector_t expected = { (T)42,(T)42 }; + + fill(ivect.data(), ivect.data() + ivect.size(), (T)42); + TEST_EQUAL_COLLECTIONS(tr, ivect, expected); } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(150, (T)0); + vector_t expected(150, (T)42); - void test_fill(TestResults& res, TestReporter& tr) - { - using namespace simdpp; - TestResultsSet& ts = res.new_results_set("fill"); - test_fill_type(ts, tr); - test_fill_type(ts, tr); - test_fill_type(ts, tr); - test_fill_type(ts, tr); - test_fill_type(ts, tr); - test_fill_type(ts, tr); - test_fill_type(ts, tr); - test_fill_type(ts, tr); - test_fill_type(ts, tr); - test_fill_type(ts, tr); + fill(ivect.data(), ivect.data() + ivect.size(), (T)42); + TEST_EQUAL_COLLECTIONS(tr, ivect, expected); } + { //test main loop and epilogue on range + vector_aligned_t ivect(150, (T)0); + vector_t expected(150, (T)42); + + fill(ivect.data() + 10u, ivect.data() + ivect.size() - 10u, (T)42); + for (auto i = 10; i < expected.size() - 10u; ++i) + { + TEST_EQUAL(tr, expected[i], ivect[i]); + } + } + FillFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144 });//0 generate null ptr inputs/ouput + fuzzing(tr); +} + +void test_fill(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("fill"); + test_fill_type(ts, tr); + test_fill_type(ts, tr); + test_fill_type(ts, tr); + test_fill_type(ts, tr); + test_fill_type(ts, tr); + test_fill_type(ts, tr); + test_fill_type(ts, tr); + test_fill_type(ts, tr); + test_fill_type(ts, tr); + test_fill_type(ts, tr); +} } // namespace SIMDPP_ARCH_NAMESPACE \ No newline at end of file diff --git a/test/insn/find.cc b/test/insn/find.cc index 8759f558..7dc8c7ad 100644 --- a/test/insn/find.cc +++ b/test/insn/find.cc @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -10,50 +11,82 @@ Distributed under the Boost Software License, Version 1.0. #include #include #include +#include + namespace SIMDPP_ARCH_NAMESPACE { - template - void test_find_type(TestResultsSet& ts, TestReporter& tr) + +template +struct FindFuzzingTest +{ + FindFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator(0) {} + void operator()(TestReporter& tr) { - using namespace simdpp; - using vector_aligned_t = std::vector::alignment>>; - {//test prologue - vector_aligned_t ivect(5); - std::iota(begin(ivect), end(ivect), T(1)); - auto resstd = std::find(begin(ivect), end(ivect),(T)3); - auto res = find(ivect.data(), ivect.data() + ivect.size(), (T)3); - TEST_EQUAL(tr, *resstd, *res); - } - { //test main loop and epilogue - vector_aligned_t ivect(100); - std::iota(begin(ivect), end(ivect), T(1)); - auto resstd = std::find(begin(ivect), end(ivect),(T)98); - auto res = find(ivect.data(), ivect.data() + ivect.size(), (T)98); - TEST_EQUAL(tr, *resstd, *res); - } - { //test main loop - vector_aligned_t ivect(100); - std::iota(begin(ivect), end(ivect), T(1)); - auto resstd = std::find(begin(ivect), end(ivect),(T)50); - auto res = find(ivect.data(), ivect.data() + ivect.size(), (T)50); - TEST_EQUAL(tr, *resstd, *res); + for (auto size : m_sizes) + { + const auto val = ((size - 1) / 2)+1; + {//aligned input/ouput + auto input(DataGeneratorAligned>(size, m_generator)); + auto res_std = std::find(cbegin(input), cend(input), val); + auto res_simd = simdpp::find(input.data(), input.data() + input.size(),val); + TEST_EQUAL(tr, *res_std, *res_simd); + } + {//unaligned input/ouput + auto input(DataGenerator>(size, m_generator)); + auto res_std = std::find(cbegin(input), cend(input), val); + auto res_simd = simdpp::find(input.data(), input.data() + input.size(), val); + TEST_EQUAL(tr, *res_std, *res_simd); + } } } + std::vector m_sizes; + GeneratorIota m_generator; +}; - void test_find(TestResults& res, TestReporter& tr) - { - using namespace simdpp; - TestResultsSet& ts = res.new_results_set("find"); - test_find_type(ts, tr); - test_find_type(ts, tr); - // test_find_type(ts, tr); //FIXME - // test_find_type(ts, tr); //FIXME - test_find_type(ts, tr); - test_find_type(ts, tr); - test_find_type(ts, tr); - test_find_type(ts, tr); - test_find_type(ts, tr); - test_find_type(ts, tr); +template +void test_find_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_aligned_t = std::vector::alignment>>; + {//test prologue + vector_aligned_t ivect(5); + std::iota(begin(ivect), end(ivect), T(1)); + auto resstd = std::find(begin(ivect), end(ivect),(T)3); + auto res = find(ivect.data(), ivect.data() + ivect.size(), (T)3); + TEST_EQUAL(tr, *resstd, *res); + } + { //test main loop and epilogue + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), T(1)); + auto resstd = std::find(begin(ivect), end(ivect),(T)98); + auto res = find(ivect.data(), ivect.data() + ivect.size(), (T)98); + TEST_EQUAL(tr, *resstd, *res); } + { //test main loop + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), T(1)); + auto resstd = std::find(begin(ivect), end(ivect),(T)50); + auto res = find(ivect.data(), ivect.data() + ivect.size(), (T)50); + TEST_EQUAL(tr, *resstd, *res); + } + FindFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144 });//0 generate null ptr inputs/ouput + fuzzing(tr); +} + +void test_find(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("find"); + test_find_type(ts, tr); + test_find_type(ts, tr); + // test_find_type(ts, tr); //FIXME + // test_find_type(ts, tr); //FIXME + test_find_type(ts, tr); + test_find_type(ts, tr); + test_find_type(ts, tr); + test_find_type(ts, tr); + test_find_type(ts, tr); + test_find_type(ts, tr); +} } // namespace SIMDPP_ARCH_NAMESPACE \ No newline at end of file diff --git a/test/insn/find_if.cc b/test/insn/find_if.cc index 8cc834b4..9d090309 100644 --- a/test/insn/find_if.cc +++ b/test/insn/find_if.cc @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -10,69 +11,101 @@ Distributed under the Boost Software License, Version 1.0. #include #include #include +#include namespace SIMDPP_ARCH_NAMESPACE { - template - struct UnaryPredicateSupValue - { - public: - UnaryPredicateSupValue(T val) :m_val(val), m_val_simd(simdpp::splat(val)) {} - using simd_mask_T = typename simdpp::typetraits::simd_mask_type; - using simd_type_T = typename simdpp::typetraits::simd_type; +template +struct UnaryPredicateSupValue +{ +public: + UnaryPredicateSupValue(T val) :m_val(val), m_val_simd(simdpp::splat(val)) {} + using simd_mask_T = typename simdpp::simd_traits::simd_mask_type; + using simd_type_T = typename simdpp::simd_traits::simd_type; - bool operator()(T a) const { return a > m_val; } - simd_mask_T operator()(const simd_type_T& a) const { return cmp_gt(a, m_val_simd); } + bool operator()(T a) const { return a > m_val; } + simd_mask_T operator()(const simd_type_T& a) const { return cmp_gt(a, m_val_simd); } - T m_val; - simd_type_T m_val_simd; - }; + T m_val; + simd_type_T m_val_simd; +}; - template - void test_find_if_type(TestResultsSet& ts, TestReporter& tr) +template +struct FindIfFuzzingTest +{ + FindIfFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator(0) {} + void operator()(TestReporter& tr) { - using namespace simdpp; - using vector_aligned_t = std::vector::alignment>>; - {//test prologue - vector_aligned_t ivect(5); - std::iota(begin(ivect), end(ivect), T(1)); - const auto SupThree = UnaryPredicateSupValue((T)3); - auto resstd = std::find_if(begin(ivect), end(ivect), [](T el) { return el > 3; }); - auto res = find_if(ivect.data(), ivect.data() + ivect.size(), SupThree); - TEST_EQUAL(tr, *resstd, *res); - } - { //test main loop and epilogue - vector_aligned_t ivect(100); - std::iota(begin(ivect), end(ivect), T(1)); - const auto predicate = UnaryPredicateSupValue((T)98); - auto resstd = std::find_if(begin(ivect), end(ivect), [](T el) { return el > 98; }); - auto res = find_if(ivect.data(), ivect.data() + ivect.size(), predicate); - TEST_EQUAL(tr, *resstd, *res); - } - { //test main loop - vector_aligned_t ivect(100); - std::iota(begin(ivect), end(ivect), T(1)); - const auto predicate = UnaryPredicateSupValue((T)50); - auto resstd = std::find_if(begin(ivect), end(ivect), [](T el) { return el > 50; }); - auto res = find_if(ivect.data(), ivect.data() + ivect.size(), predicate); - TEST_EQUAL(tr, *resstd, *res); + + for (auto size : m_sizes) + { + const auto pred = UnaryPredicateSupValue((T)((size - 1) / 2)); + {//aligned input/ouput + auto input(DataGeneratorAligned>(size, m_generator)); + auto res_std = std::find_if(cbegin(input), cend(input), pred); + auto res_simd = simdpp::find_if(input.data(), input.data() + input.size(), pred); + TEST_EQUAL(tr, *res_std, *res_simd); + } + {//unaligned input/ouput + auto input(DataGenerator>(size, m_generator)); + auto res_std = std::find_if(cbegin(input), cend(input), pred); + auto res_simd = simdpp::find_if(input.data(), input.data() + input.size(), pred); + TEST_EQUAL(tr, *res_std, *res_simd); + + } } } + std::vector m_sizes; + GeneratorIota m_generator; +}; - void test_find_if(TestResults& res, TestReporter& tr) - { - using namespace simdpp; - TestResultsSet& ts = res.new_results_set("find_if"); - test_find_if_type(ts, tr); - test_find_if_type(ts, tr); - //test_find_if_type(ts, tr); //FIXME - //test_find_if_type(ts, tr); //FIXME - test_find_if_type(ts, tr); - test_find_if_type(ts, tr); - test_find_if_type(ts, tr); - test_find_if_type(ts, tr); - test_find_if_type(ts, tr); - test_find_if_type(ts, tr); +template +void test_find_if_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_aligned_t = std::vector::alignment>>; + {//test prologue + vector_aligned_t ivect(5); + std::iota(begin(ivect), end(ivect), T(1)); + const auto SupThree = UnaryPredicateSupValue((T)3); + auto resstd = std::find_if(begin(ivect), end(ivect), [](T el) { return el > 3; }); + auto res = find_if(ivect.data(), ivect.data() + ivect.size(), SupThree); + TEST_EQUAL(tr, *resstd, *res); + } + { //test main loop and epilogue + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), T(1)); + const auto predicate = UnaryPredicateSupValue((T)98); + auto resstd = std::find_if(begin(ivect), end(ivect), [](T el) { return el > 98; }); + auto res = find_if(ivect.data(), ivect.data() + ivect.size(), predicate); + TEST_EQUAL(tr, *resstd, *res); } + { //test main loop + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), T(1)); + const auto predicate = UnaryPredicateSupValue((T)50); + auto resstd = std::find_if(begin(ivect), end(ivect), [](T el) { return el > 50; }); + auto res = find_if(ivect.data(), ivect.data() + ivect.size(), predicate); + TEST_EQUAL(tr, *resstd, *res); + } + FindIfFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144 });//0 generate null ptr inputs/ouput + fuzzing(tr); +} + +void test_find_if(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("find_if"); + test_find_if_type(ts, tr); + test_find_if_type(ts, tr); + //test_find_if_type(ts, tr); //FIXME + //test_find_if_type(ts, tr); //FIXME + test_find_if_type(ts, tr); + test_find_if_type(ts, tr); + test_find_if_type(ts, tr); + test_find_if_type(ts, tr); + test_find_if_type(ts, tr); + test_find_if_type(ts, tr); +} } // namespace SIMDPP_ARCH_NAMESPACE \ No newline at end of file diff --git a/test/insn/find_if_not.cc b/test/insn/find_if_not.cc index 3a44274f..6fe7a088 100644 --- a/test/insn/find_if_not.cc +++ b/test/insn/find_if_not.cc @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -10,69 +11,101 @@ Distributed under the Boost Software License, Version 1.0. #include #include #include +#include namespace SIMDPP_ARCH_NAMESPACE { - template - struct UnaryPredicateInfValue - { - public: - UnaryPredicateInfValue(T val) :m_val(val), m_val_simd(simdpp::splat(val)) {} - using simd_mask_T = typename simdpp::typetraits::simd_mask_type; - using simd_type_T = typename simdpp::typetraits::simd_type; +template +struct UnaryPredicateInfValue +{ +public: + UnaryPredicateInfValue(T val) :m_val(val), m_val_simd(simdpp::splat(val)) {} + using simd_mask_T = typename simdpp::simd_traits::simd_mask_type; + using simd_type_T = typename simdpp::simd_traits::simd_type; - bool operator()(T a) const { return a < m_val; } - simd_mask_T operator()(const simd_type_T& a) const { return cmp_lt(a, m_val_simd); } - private: - T m_val; - simd_type_T m_val_simd; - }; + bool operator()(T a) const { return a < m_val; } + simd_mask_T operator()(const simd_type_T& a) const { return cmp_lt(a, m_val_simd); } +private: + T m_val; + simd_type_T m_val_simd; +}; - template - void test_find_if_not_type(TestResultsSet& ts, TestReporter& tr) +template +struct FindIfNotFuzzingTest +{ + FindIfNotFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator(0) {} + void operator()(TestReporter& tr) { - using namespace simdpp; - using vector_aligned_t = std::vector::alignment>>; - {//test prologue - vector_aligned_t ivect(5); - std::iota(begin(ivect), end(ivect), T(1)); - const auto SupThree = UnaryPredicateInfValue((T)3); - auto resstd = std::find_if_not(begin(ivect), end(ivect), [](T el) { return el < 3; }); - auto res = find_if_not(ivect.data(), ivect.data() + ivect.size(), SupThree); - TEST_EQUAL(tr, *resstd, *res); - } - { //test main loop and epilogue - vector_aligned_t ivect(100); - std::iota(begin(ivect), end(ivect), T(1)); - const auto predicate = UnaryPredicateInfValue((T)98); - auto resstd = std::find_if_not(begin(ivect), end(ivect), [](T el) { return el < 98; }); - auto res = find_if_not(ivect.data(), ivect.data() + ivect.size(), predicate); - TEST_EQUAL(tr, *resstd, *res); - } - { //test main loop - vector_aligned_t ivect(100); - std::iota(begin(ivect), end(ivect), T(1)); - const auto predicate = UnaryPredicateInfValue((T)50); - auto resstd = std::find_if_not(begin(ivect), end(ivect), [](T el) { return el < 50; }); - auto res = find_if_not(ivect.data(), ivect.data() + ivect.size(), predicate); - TEST_EQUAL(tr, *resstd, *res); + + for (auto size : m_sizes) + { + const auto pred = UnaryPredicateInfValue((T)((size - 1) / 2)); + {//aligned input/ouput + auto input(DataGeneratorAligned>(size, m_generator)); + auto res_std = std::find_if_not(cbegin(input), cend(input), pred); + auto res_simd = simdpp::find_if_not(input.data(), input.data() + input.size(), pred); + TEST_EQUAL(tr, *res_std, *res_simd); + } + {//unaligned input/ouput + auto input(DataGenerator>(size, m_generator)); + auto res_std = std::find_if_not(cbegin(input), cend(input), pred); + auto res_simd = simdpp::find_if_not(input.data(), input.data() + input.size(), pred); + TEST_EQUAL(tr, *res_std, *res_simd); + + } } } + std::vector m_sizes; + GeneratorIota m_generator; +}; - void test_find_if_not(TestResults& res, TestReporter& tr) - { - using namespace simdpp; - TestResultsSet& ts = res.new_results_set("find_if_not"); - //test_find_if_not_type(ts, tr); //FIXME missing reduce and - //test_find_if_not_type(ts, tr); //FIXME missing reduce and - //test_find_if_not_type(ts, tr); //FIXME - //test_find_if_not_type(ts, tr); //FIXME - test_find_if_not_type(ts, tr); - test_find_if_not_type(ts, tr); - test_find_if_not_type(ts, tr); - test_find_if_not_type(ts, tr); - test_find_if_not_type(ts, tr); - test_find_if_not_type(ts, tr); +template +void test_find_if_not_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_aligned_t = std::vector::alignment>>; + {//test prologue + vector_aligned_t ivect(5); + std::iota(begin(ivect), end(ivect), T(1)); + const auto InfThree = UnaryPredicateInfValue((T)3); + auto resstd = std::find_if_not(begin(ivect), end(ivect), [](T el) { return el < 3; }); + auto res = find_if_not(ivect.data(), ivect.data() + ivect.size(), InfThree); + TEST_EQUAL(tr, *resstd, *res); + } + { //test main loop and epilogue + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), T(1)); + const auto predicate = UnaryPredicateInfValue((T)98); + auto resstd = std::find_if_not(begin(ivect), end(ivect), [](T el) { return el < 98; }); + auto res = find_if_not(ivect.data(), ivect.data() + ivect.size(), predicate); + TEST_EQUAL(tr, *resstd, *res); } + { //test main loop + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), T(1)); + const auto predicate = UnaryPredicateInfValue((T)50); + auto resstd = std::find_if_not(begin(ivect), end(ivect), [](T el) { return el < 50; }); + auto res = find_if_not(ivect.data(), ivect.data() + ivect.size(), predicate); + TEST_EQUAL(tr, *resstd, *res); + } + FindIfNotFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144 });//0 generate null ptr inputs/ouput + fuzzing(tr); +} + +void test_find_if_not(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("find_if_not"); + //test_find_if_not_type(ts, tr); //FIXME missing reduce and + //test_find_if_not_type(ts, tr); //FIXME missing reduce and + //test_find_if_not_type(ts, tr); //FIXME + //test_find_if_not_type(ts, tr); //FIXME + test_find_if_not_type(ts, tr); + test_find_if_not_type(ts, tr); + test_find_if_not_type(ts, tr); + test_find_if_not_type(ts, tr); + test_find_if_not_type(ts, tr); + test_find_if_not_type(ts, tr); +} } // namespace SIMDPP_ARCH_NAMESPACE \ No newline at end of file diff --git a/test/insn/lexicographical_compare.cc b/test/insn/lexicographical_compare.cc index 12d10759..dd7e76cf 100644 --- a/test/insn/lexicographical_compare.cc +++ b/test/insn/lexicographical_compare.cc @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -12,53 +13,87 @@ #include #include #include +#include namespace SIMDPP_ARCH_NAMESPACE { +template +struct LexicographicalCompareFuzzingTest +{ + LexicographicalCompareFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator1(0), m_generator2(1) {} + void operator()(TestReporter& tr) + { - template - void test_lexicograpical_compare_type(TestResultsSet& ts, TestReporter& tr) - { - using namespace simdpp; - using vector_aligned_t = std::vector::alignment>>; - { //test prologue - vector_aligned_t ivect = {(T)0,(T)1}; - vector_aligned_t ivect2={(T)1,(T)2}; - auto res = lexicographical_compare(ivect.data(),ivect.data()+ivect.size(),ivect2.data(),ivect2.data()+ivect2.size()); - auto resstd = std::lexicographical_compare(begin(ivect), end(ivect),begin(ivect2), end(ivect2)); - TEST_EQUAL(tr, res, resstd); - } - { //test main loop and epilogue on aligned vector - vector_aligned_t ivect(50); - vector_aligned_t ivect2(50); - std::iota(begin(ivect),end(ivect),(T)0); - std::iota(begin(ivect2),end(ivect2),(T)1); - auto res = lexicographical_compare(ivect.data(),ivect.data()+ivect.size(),ivect2.data(),ivect2.data()+ivect2.size()); - auto resstd = std::lexicographical_compare(begin(ivect), end(ivect),begin(ivect2), end(ivect2)); - TEST_EQUAL(tr, res, resstd); - auto resinv = lexicographical_compare(ivect2.data(),ivect2.data()+ivect2.size(),ivect.data(),ivect.data()+ivect.size()); - auto resstdinv = std::lexicographical_compare(begin(ivect2), end(ivect2),begin(ivect), end(ivect)); - TEST_EQUAL(tr, resinv, resstdinv); - auto ressame = lexicographical_compare(ivect.data(),ivect.data()+ivect.size(),ivect.data(),ivect.data()+ivect.size()); - auto resstdsame = std::lexicographical_compare(begin(ivect), end(ivect),begin(ivect), end(ivect)); - TEST_EQUAL(tr, ressame, resstdsame); - } - } + for (auto size : m_sizes) + { + {//aligned input/ouput + auto input(DataGeneratorAligned>(size, m_generator1)); + auto input2(DataGeneratorAligned>(size, m_generator2)); + auto res_std = std::lexicographical_compare(cbegin(input), cend(input), cbegin(input2), cend(input2)); + auto res_simd = simdpp::lexicographical_compare(input.data(), input.data() + input.size(), input2.data(), input2.data() + input2.size() ); + TEST_EQUAL(tr, res_std, res_simd); + } + {//unaligned input/ouput + auto input(DataGenerator>(size, m_generator1)); + auto input2(DataGenerator>(size, m_generator2)); + auto res_std = std::lexicographical_compare(cbegin(input), cend(input), cbegin(input2), cend(input2)); + auto res_simd = simdpp::lexicographical_compare(input.data(), input.data() + input.size(), input2.data(), input2.data() + input2.size()); + TEST_EQUAL(tr, res_std, res_simd); - void test_lexicographical_compare(TestResults& res, TestReporter& tr) - { - using namespace simdpp; - TestResultsSet& ts = res.new_results_set("lexicographical_compare"); - //test_lexicograpical_compare_type(ts, tr); //FIXME - //test_lexicograpical_compare_type(ts, tr); //FIXME - //test_lexicograpical_compare_type(ts, tr); //FIXME - //test_lexicograpical_compare_type(ts, tr); //FIXME - test_lexicograpical_compare_type(ts, tr); - test_lexicograpical_compare_type(ts, tr); - test_lexicograpical_compare_type(ts, tr); - test_lexicograpical_compare_type(ts, tr); - test_lexicograpical_compare_type(ts, tr); - test_lexicograpical_compare_type(ts, tr); - } - } // namespace SIMDPP_ARCH_NAMESPACE + } + } + } + std::vector m_sizes; + GeneratorIota m_generator1; + GeneratorIota m_generator2; +}; + +template + void test_lexicograpical_compare_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_aligned_t = std::vector::alignment>>; + { //test prologue + vector_aligned_t ivect = {(T)0,(T)1}; + vector_aligned_t ivect2={(T)1,(T)2}; + auto res = lexicographical_compare(ivect.data(),ivect.data()+ivect.size(),ivect2.data(),ivect2.data()+ivect2.size()); + auto resstd = std::lexicographical_compare(begin(ivect), end(ivect),begin(ivect2), end(ivect2)); + TEST_EQUAL(tr, res, resstd); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(50); + vector_aligned_t ivect2(50); + std::iota(begin(ivect),end(ivect),(T)0); + std::iota(begin(ivect2),end(ivect2),(T)1); + auto res = lexicographical_compare(ivect.data(),ivect.data()+ivect.size(),ivect2.data(),ivect2.data()+ivect2.size()); + auto resstd = std::lexicographical_compare(begin(ivect), end(ivect),begin(ivect2), end(ivect2)); + TEST_EQUAL(tr, res, resstd); + auto resinv = lexicographical_compare(ivect2.data(),ivect2.data()+ivect2.size(),ivect.data(),ivect.data()+ivect.size()); + auto resstdinv = std::lexicographical_compare(begin(ivect2), end(ivect2),begin(ivect), end(ivect)); + TEST_EQUAL(tr, resinv, resstdinv); + auto ressame = lexicographical_compare(ivect.data(),ivect.data()+ivect.size(),ivect.data(),ivect.data()+ivect.size()); + auto resstdsame = std::lexicographical_compare(begin(ivect), end(ivect),begin(ivect), end(ivect)); + TEST_EQUAL(tr, ressame, resstdsame); + } + LexicographicalCompareFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144 });//0 generate null ptr inputs/ouput + fuzzing(tr); +} + +void test_lexicographical_compare(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("lexicographical_compare"); + //test_lexicograpical_compare_type(ts, tr); //FIXME + //test_lexicograpical_compare_type(ts, tr); //FIXME + //test_lexicograpical_compare_type(ts, tr); //FIXME + //test_lexicograpical_compare_type(ts, tr); //FIXME + test_lexicograpical_compare_type(ts, tr); + test_lexicograpical_compare_type(ts, tr); + test_lexicograpical_compare_type(ts, tr); + test_lexicograpical_compare_type(ts, tr); + test_lexicograpical_compare_type(ts, tr); + test_lexicograpical_compare_type(ts, tr); +} + +} // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/max.cc b/test/insn/max.cc index 6bd7344c..29c2c9bd 100644 --- a/test/insn/max.cc +++ b/test/insn/max.cc @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -11,95 +12,143 @@ Distributed under the Boost Software License, Version 1.0. #include #include #include - +#include namespace SIMDPP_ARCH_NAMESPACE { - template - struct binary_cmp_greater - { - public: - binary_cmp_greater() = default; - using simd_mask_T = typename simdpp::typetraits::simd_mask_type; - using simd_type_T = typename simdpp::typetraits::simd_type; +template +struct binary_cmp_greater +{ +public: + binary_cmp_greater() = default; + using simd_mask_T = typename simdpp::simd_traits::simd_mask_type; + using simd_type_T = typename simdpp::simd_traits::simd_type; - SIMDPP_INL bool operator()(T a, T b) { return a > b; } + SIMDPP_INL bool operator()(T a, T b) { return a > b; } - SIMDPP_INL simd_mask_T operator()(const simd_type_T& a, const simd_type_T& b) { return simdpp::cmp_gt(a, b); } - }; + SIMDPP_INL simd_mask_T operator()(const simd_type_T& a, const simd_type_T& b) { return simdpp::cmp_gt(a, b); } +}; - template - void test_max_type(TestResultsSet& ts, TestReporter& tr) +template +typename Container::value_type ExtractMaxFromContainer(const Container& cont) +{ + typedef typename Container::value_type value_type; + value_type current = std::numeric_limits::lowest(); + auto it=cbegin(cont),itend=cend(cont); + for (; it != itend; ++it) + { + current=std::max(current,*it); + } + return current; +} +template +struct MaxFuzzingTest +{ + MaxFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator() {} + void operator()(TestReporter& tr) { - using namespace simdpp; - using vector_t = std::vector; - using vector_aligned_t = std::vector::alignment>>; auto cmpOPGreater = binary_cmp_greater(); - {//test classical max - { //test prologue - vector_aligned_t ivect(5); - std::iota(begin(ivect), end(ivect), (T)1); - ivect[0] = { (T)127 }; - TEST_EQUAL(tr, (T)127, max(ivect.data(), ivect.data() + ivect.size())); + for (auto size : m_sizes) + { + {//aligned input/ouput + auto input(DataGeneratorAligned>(size, m_generator)); + auto res=simdpp::max(input.data(), input.data() + input.size()); + TEST_EQUAL(tr, res, ExtractMaxFromContainer(input)); } - { //test epilogue - vector_aligned_t ivect(100); - std::iota(begin(ivect), end(ivect), (T)1); - ivect[99] = { (T)127 }; - TEST_EQUAL(tr, (T)127, max(ivect.data(), ivect.data() + ivect.size())); + {//unaligned input/ouput + auto input(DataGenerator>(size, m_generator)); + auto res = simdpp::max(input.data(), input.data() + input.size()); + TEST_EQUAL(tr, res, ExtractMaxFromContainer(input)); } - { //test main loop and epilogue on aligned vector - vector_aligned_t ivect(100); - std::iota(begin(ivect), end(ivect), (T)1); - ivect[50] = { (T)127 }; - TEST_EQUAL(tr, (T)127, max(ivect.data(), ivect.data() + ivect.size())); + {//aligned input/ouput + predicate + auto input(DataGeneratorAligned>(size, m_generator)); + auto res = simdpp::max(input.data(), input.data() + input.size(), cmpOPGreater); + TEST_EQUAL(tr, res, ExtractMaxFromContainer(input)); } - {// test first==last - vector_aligned_t ivect(5); - TEST_EQUAL(tr, std::numeric_limits::lowest(), max(ivect.data() + ivect.size(), ivect.data() + ivect.size())); + {//unaligned input/ouput + predicate + auto input(DataGenerator>(size, m_generator)); + auto res = simdpp::max(input.data(), input.data() + input.size(), cmpOPGreater); + TEST_EQUAL(tr, res, ExtractMaxFromContainer(input)); } } - {//test max with comp op - { //test prologue - vector_aligned_t ivect(5); - std::iota(begin(ivect), end(ivect), (T)1); - ivect[0] = { (T)127 }; - TEST_EQUAL(tr, (T)127, max(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); - } - { //test epilogue - vector_aligned_t ivect(100); - std::iota(begin(ivect), end(ivect), (T)1); - ivect[99] = { (T)127 }; - TEST_EQUAL(tr, (T)127, max(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); - } - { //test main loop and epilogue on aligned vector - vector_aligned_t ivect(100); - std::iota(begin(ivect), end(ivect), (T)1); - ivect[50] = { (T)127 }; - TEST_EQUAL(tr, (T)127, max(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); - } - {// test first==last - vector_aligned_t ivect(5); - TEST_EQUAL(tr, std::numeric_limits::lowest(), max(ivect.data() + ivect.size(), ivect.data() + ivect.size(), cmpOPGreater)); - } + } + std::vector m_sizes; + GeneratorRandom m_generator; +}; +template +void test_max_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + auto cmpOPGreater = binary_cmp_greater(); + {//test classical max + { //test prologue + vector_aligned_t ivect(5); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[0] = { (T)127 }; + TEST_EQUAL(tr, (T)127, max(ivect.data(), ivect.data() + ivect.size())); + } + { //test epilogue + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[99] = { (T)127 }; + TEST_EQUAL(tr, (T)127, max(ivect.data(), ivect.data() + ivect.size())); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[50] = { (T)127 }; + TEST_EQUAL(tr, (T)127, max(ivect.data(), ivect.data() + ivect.size())); + } + {// test first==last + vector_aligned_t ivect(5); + TEST_EQUAL(tr, std::numeric_limits::lowest(), max(ivect.data() + ivect.size(), ivect.data() + ivect.size())); } - } - - void test_max(TestResults& res, TestReporter& tr) - { - using namespace simdpp; - TestResultsSet& ts = res.new_results_set("max"); - test_max_type(ts, tr); - test_max_type(ts, tr); - //test_max_type(ts, tr); //FIXME - //test_max_type(ts, tr); //FIXME - test_max_type(ts, tr); - test_max_type(ts, tr); - test_max_type(ts, tr); - test_max_type(ts, tr); - test_max_type(ts, tr); - test_max_type(ts, tr); + {//test max with comp op + { //test prologue + vector_aligned_t ivect(5); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[0] = { (T)127 }; + TEST_EQUAL(tr, (T)127, max(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); + } + { //test epilogue + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[99] = { (T)127 }; + TEST_EQUAL(tr, (T)127, max(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[50] = { (T)127 }; + TEST_EQUAL(tr, (T)127, max(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); + } + {// test first==last + vector_aligned_t ivect(5); + TEST_EQUAL(tr, std::numeric_limits::lowest(), max(ivect.data() + ivect.size(), ivect.data() + ivect.size(), cmpOPGreater)); + } } + MaxFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144});//0 generate null ptr inputs/ouput + fuzzing(tr); +} + +void test_max(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("max"); + + test_max_type(ts, tr); + test_max_type(ts, tr); + //test_max_type(ts, tr); //FIXME + //test_max_type(ts, tr); //FIXME + test_max_type(ts, tr); + test_max_type(ts, tr); + test_max_type(ts, tr); + test_max_type(ts, tr); + test_max_type(ts, tr); + test_max_type(ts, tr); +} } // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/max_element.cc b/test/insn/max_element.cc index ec444c33..2c4e3c83 100644 --- a/test/insn/max_element.cc +++ b/test/insn/max_element.cc @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -11,87 +12,129 @@ Distributed under the Boost Software License, Version 1.0. #include #include #include - +#include namespace SIMDPP_ARCH_NAMESPACE { - template - struct binary_cmp_greater - { - public: - binary_cmp_greater() = default; - using simd_mask_T = typename simdpp::typetraits::simd_mask_type; - using simd_type_T = typename simdpp::typetraits::simd_type; +template +struct binary_cmp_greater +{ +public: + binary_cmp_greater() = default; + using simd_mask_T = typename simdpp::simd_traits::simd_mask_type; + using simd_type_T = typename simdpp::simd_traits::simd_type; - bool operator()(T a, T b) { return a > b; } + bool operator()(T a, T b) { return a < b; } - simd_mask_T operator()(const simd_type_T& a, const simd_type_T& b) { return simdpp::cmp_gt(a, b); } - }; + simd_mask_T operator()(const simd_type_T& a, const simd_type_T& b) { return simdpp::cmp_lt(a, b); } +}; - template - void test_max_element_type(TestResultsSet& ts, TestReporter& tr) +template +struct MaxElementFuzzingTest +{ + MaxElementFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator() {} + void operator()(TestReporter& tr) { - using namespace simdpp; - using vector_t = std::vector; - using vector_aligned_t = std::vector::alignment>>; auto cmpOPGreater = binary_cmp_greater(); - {//test classical max - { //test prologue - vector_aligned_t ivect(5); - std::iota(begin(ivect), end(ivect), (T)1); - ivect[0] = { (T)127 }; - TEST_EQUAL(tr, (T)127, *max_element(ivect.data(), ivect.data() + ivect.size())); - } - { //test epilogue - vector_aligned_t ivect(100); - std::iota(begin(ivect), end(ivect), (T)1); - ivect[99] = { (T)127 }; - TEST_EQUAL(tr, (T)127, *max_element(ivect.data(), ivect.data() + ivect.size())); - } - { //test main loop and epilogue on aligned vector - vector_aligned_t ivect(100); - std::iota(begin(ivect), end(ivect), (T)1); - ivect[50] = { (T)127 }; - TEST_EQUAL(tr, (T)127, *max_element(ivect.data(), ivect.data() + ivect.size())); + + for (auto size : m_sizes) + { + {//aligned input/ouput + auto input(DataGeneratorAligned>(size, m_generator)); + auto res_std = std::max_element(cbegin(input), cend(input)); + auto res_simd = simdpp::max_element(input.data(), input.data() + input.size()); + TEST_EQUAL(tr, *res_std,*res_simd); } - } - {//test max with comp op - { //test prologue - vector_aligned_t ivect(5); - std::iota(begin(ivect), end(ivect), (T)1); - ivect[0] = { (T)127 }; - TEST_EQUAL(tr, (T)127, *max_element(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); + {//unaligned input/ouput + auto input(DataGenerator>(size, m_generator)); + auto res_std = std::max_element(cbegin(input), cend(input)); + auto res_simd = simdpp::max_element(input.data(), input.data() + input.size()); + TEST_EQUAL(tr, *res_std, *res_simd); + } - { //test epilogue - vector_aligned_t ivect(100); - std::iota(begin(ivect), end(ivect), (T)1); - ivect[99] = { (T)127 }; - TEST_EQUAL(tr, (T)127, *max_element(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); + {//aligned input/ouput + predicate + auto input(DataGeneratorAligned>(size, m_generator)); + auto res_std = std::max_element(cbegin(input), cend(input), cmpOPGreater); + auto res_simd = simdpp::max_element(input.data(), input.data() + input.size(), cmpOPGreater); + TEST_EQUAL(tr, *res_std, *res_simd); } - { //test main loop and epilogue on aligned vector - vector_aligned_t ivect(100); - std::iota(begin(ivect), end(ivect), (T)1); - ivect[50] = { (T)127 }; - TEST_EQUAL(tr, (T)127, *max_element(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); + {//unaligned input/ouput + predicate + auto input(DataGenerator>(size, m_generator)); + auto res_std = std::max_element(cbegin(input), cend(input), cmpOPGreater); + auto res_simd = simdpp::max_element(input.data(), input.data() + input.size(), cmpOPGreater); + TEST_EQUAL(tr, *res_std, *res_simd); } } - } + std::vector m_sizes; + GeneratorRandom m_generator; +}; - void test_max_element(TestResults& res, TestReporter& tr) - { - using namespace simdpp; - TestResultsSet& ts = res.new_results_set("max_element"); - test_max_element_type(ts, tr); - test_max_element_type(ts, tr); - //test_max_type(ts, tr); //FIXME - //test_max_type(ts, tr); //FIXME - test_max_element_type(ts, tr); - test_max_element_type(ts, tr); - test_max_element_type(ts, tr); - test_max_element_type(ts, tr); - test_max_element_type(ts, tr); - test_max_element_type(ts, tr); +template +void test_max_element_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + auto cmpOPGreater = binary_cmp_greater(); + {//test classical max + { //test prologue + vector_aligned_t ivect(5); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[0] = { (T)127 }; + TEST_EQUAL(tr, (T)127, *max_element(ivect.data(), ivect.data() + ivect.size())); + } + { //test epilogue + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[99] = { (T)127 }; + TEST_EQUAL(tr, (T)127, *max_element(ivect.data(), ivect.data() + ivect.size())); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[50] = { (T)127 }; + TEST_EQUAL(tr, (T)127, *max_element(ivect.data(), ivect.data() + ivect.size())); + } + } + {//test max with comp op + { //test prologue + vector_aligned_t ivect(5); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[0] = { (T)127 }; + TEST_EQUAL(tr, (T)127, *max_element(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); + } + { //test epilogue + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[99] = { (T)127 }; + TEST_EQUAL(tr, (T)127, *max_element(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[50] = { (T)127 }; + TEST_EQUAL(tr, (T)127, *max_element(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); + } } + MaxElementFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144});//0 generate null ptr inputs/ouput + fuzzing(tr); +} + +void test_max_element(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("max_element"); + test_max_element_type(ts, tr); + test_max_element_type(ts, tr); + //test_max_type(ts, tr); //FIXME + //test_max_type(ts, tr); //FIXME + test_max_element_type(ts, tr); + test_max_element_type(ts, tr); + test_max_element_type(ts, tr); + test_max_element_type(ts, tr); + test_max_element_type(ts, tr); + test_max_element_type(ts, tr); +} } // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/min.cc b/test/insn/min.cc index 9eef179e..e7fe43b5 100644 --- a/test/insn/min.cc +++ b/test/insn/min.cc @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -11,94 +12,147 @@ Distributed under the Boost Software License, Version 1.0. #include #include #include +#include namespace SIMDPP_ARCH_NAMESPACE { - template - struct binary_cmp_greater - { - public: - binary_cmp_greater() = default; - using simd_mask_T = typename simdpp::typetraits::simd_mask_type; - using simd_type_T = typename simdpp::typetraits::simd_type; +template +struct binary_cmp_greater +{ +public: + binary_cmp_greater() = default; + using simd_mask_T = typename simdpp::simd_traits::simd_mask_type; + using simd_type_T = typename simdpp::simd_traits::simd_type; - SIMDPP_INL bool operator()(T a, T b) { return a > b; } + SIMDPP_INL bool operator()(T a, T b) { return a > b; } - SIMDPP_INL simd_mask_T operator()(const simd_type_T& a, const simd_type_T& b) { return simdpp::cmp_gt(a, b); } - }; + SIMDPP_INL simd_mask_T operator()(const simd_type_T& a, const simd_type_T& b) { return simdpp::cmp_gt(a, b); } +}; - template - void test_min_type(TestResultsSet& ts, TestReporter& tr) +template +typename Container::value_type ExtractMinFromContainer(const Container& cont) +{ + typedef typename Container::value_type value_type; + value_type current = std::numeric_limits::max(); + auto it = cbegin(cont), itend = cend(cont); + for (; it != itend; ++it) + { + current = std::min(current, *it); + } + return current; +} + +template +struct MinFuzzingTest +{ + MinFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator() {} + void operator()(TestReporter& tr) { - using namespace simdpp; - using vector_t = std::vector; - using vector_aligned_t = std::vector::alignment>>; auto cmpOPGreater = binary_cmp_greater(); - {//test classical min - { //test prologue - vector_aligned_t ivect(5); - std::iota(begin(ivect), end(ivect), (T)1); - ivect[0] = { (T)0 }; - TEST_EQUAL(tr, (T)0, min(ivect.data(), ivect.data() + ivect.size())); + for (auto size : m_sizes) + { + {//aligned input/ouput + auto input(DataGeneratorAligned>(size, m_generator)); + std::reverse(std::begin(input), std::end(input)); + auto res = simdpp::min(input.data(), input.data() + input.size()); + TEST_EQUAL(tr, res, ExtractMinFromContainer(input)); } - { //test epilogue - vector_aligned_t ivect(100); - std::iota(begin(ivect), end(ivect), (T)1); - ivect[99] = { (T)0 }; - TEST_EQUAL(tr, (T)0, min(ivect.data(), ivect.data() + ivect.size())); + {//unaligned input/ouput + auto input(DataGenerator>(size, m_generator)); + std::reverse(std::begin(input), std::end(input)); + auto res = simdpp::min(input.data(), input.data() + input.size()); + TEST_EQUAL(tr, res, ExtractMinFromContainer(input)); } - { //test main loop and epilogue on aligned vector - vector_aligned_t ivect(100); - std::iota(begin(ivect), end(ivect), (T)1); - ivect[50] = { (T)0 }; - TEST_EQUAL(tr, (T)0, min(ivect.data(), ivect.data() + ivect.size())); + {//aligned input/ouput + predicate + auto input(DataGeneratorAligned>(size, m_generator)); + std::reverse(std::begin(input), std::end(input)); + auto res = simdpp::min(input.data(), input.data() + input.size(), cmpOPGreater); + TEST_EQUAL(tr, res, ExtractMinFromContainer(input)); } - {// test first==last - vector_aligned_t ivect(5); - TEST_EQUAL(tr, std::numeric_limits::max(), min(ivect.data() + ivect.size(), ivect.data() + ivect.size())); + {//unaligned input/ouput + predicate + auto input(DataGenerator>(size, m_generator)); + std::reverse(std::begin(input), std::end(input)); + auto res = simdpp::min(input.data(), input.data() + input.size(), cmpOPGreater); + TEST_EQUAL(tr, res, ExtractMinFromContainer(input)); } } - {//test min with comp op - { //test prologue - vector_aligned_t ivect(5); - std::iota(begin(ivect), end(ivect), (T)1); - ivect[0] = { (T)0 }; - TEST_EQUAL(tr, (T)0, min(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); - } - { //test epilogue - vector_aligned_t ivect(100); - std::iota(begin(ivect), end(ivect), (T)1); - ivect[99] = { (T)0 }; - TEST_EQUAL(tr, (T)0, min(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); - } - { //test main loop and epilogue on aligned vector - vector_aligned_t ivect(100); - std::iota(begin(ivect), end(ivect), (T)1); - ivect[50] = { (T)0 }; - TEST_EQUAL(tr, (T)0, min(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); - } - {// test first==last - vector_aligned_t ivect(5); - TEST_EQUAL(tr, std::numeric_limits::max(), min(ivect.data() + ivect.size(), ivect.data() + ivect.size(), cmpOPGreater)); - } + } + std::vector m_sizes; + GeneratorRandom m_generator; +}; +template +void test_min_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + auto cmpOPGreater = binary_cmp_greater(); + {//test classical min + { //test prologue + vector_aligned_t ivect(5); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[0] = { (T)0 }; + TEST_EQUAL(tr, (T)0, min(ivect.data(), ivect.data() + ivect.size())); + } + { //test epilogue + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[99] = { (T)0 }; + TEST_EQUAL(tr, (T)0, min(ivect.data(), ivect.data() + ivect.size())); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[50] = { (T)0 }; + TEST_EQUAL(tr, (T)0, min(ivect.data(), ivect.data() + ivect.size())); + } + {// test first==last + vector_aligned_t ivect(5); + TEST_EQUAL(tr, std::numeric_limits::max(), min(ivect.data() + ivect.size(), ivect.data() + ivect.size())); } - } - - void test_min(TestResults& res, TestReporter& tr) - { - using namespace simdpp; - TestResultsSet& ts = res.new_results_set("min"); - test_min_type(ts, tr); - test_min_type(ts, tr); - //test_min_type(ts, tr); //FIXME - //test_min_type(ts, tr); //FIXME - test_min_type(ts, tr); - test_min_type(ts, tr); - test_min_type(ts, tr); - test_min_type(ts, tr); - test_min_type(ts, tr); - test_min_type(ts, tr); + {//test min with comp op + { //test prologue + vector_aligned_t ivect(5); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[0] = { (T)0 }; + TEST_EQUAL(tr, (T)0, min(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); + } + { //test epilogue + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[99] = { (T)0 }; + TEST_EQUAL(tr, (T)0, min(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[50] = { (T)0 }; + TEST_EQUAL(tr, (T)0, min(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); + } + {// test first==last + vector_aligned_t ivect(5); + TEST_EQUAL(tr, std::numeric_limits::max(), min(ivect.data() + ivect.size(), ivect.data() + ivect.size(), cmpOPGreater)); + } } + MinFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144 });//0 generate null ptr inputs/ouput + fuzzing(tr); +} + +void test_min(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("min"); + test_min_type(ts, tr); + test_min_type(ts, tr); + //test_min_type(ts, tr); //FIXME + //test_min_type(ts, tr); //FIXME + test_min_type(ts, tr); + test_min_type(ts, tr); + test_min_type(ts, tr); + test_min_type(ts, tr); + test_min_type(ts, tr); + test_min_type(ts, tr); +} } // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/min_element.cc b/test/insn/min_element.cc index abebc019..c7d83843 100644 --- a/test/insn/min_element.cc +++ b/test/insn/min_element.cc @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -11,87 +12,133 @@ Distributed under the Boost Software License, Version 1.0. #include #include #include - +#include namespace SIMDPP_ARCH_NAMESPACE { - template - struct binary_cmp_greater - { - public: - binary_cmp_greater() = default; - using simd_mask_T = typename simdpp::typetraits::simd_mask_type; - using simd_type_T = typename simdpp::typetraits::simd_type; +template +struct binary_cmp_greater +{ +public: + binary_cmp_greater() = default; + using simd_mask_T = typename simdpp::simd_traits::simd_mask_type; + using simd_type_T = typename simdpp::simd_traits::simd_type; - SIMDPP_INL bool operator()(T a, T b) { return a > b; } + SIMDPP_INL bool operator()(T a, T b) { return a < b; } - SIMDPP_INL simd_mask_T operator()(const simd_type_T& a, const simd_type_T& b) { return simdpp::cmp_gt(a, b); } - }; + SIMDPP_INL simd_mask_T operator()(const simd_type_T& a, const simd_type_T& b) { return simdpp::cmp_lt(a, b); } +}; - template - void test_min_element_type(TestResultsSet& ts, TestReporter& tr) +template +struct MinElementFuzzingTest +{ + MinElementFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator() {} + void operator()(TestReporter& tr) { - using namespace simdpp; - using vector_t = std::vector; - using vector_aligned_t = std::vector::alignment>>; auto cmpOPGreater = binary_cmp_greater(); - {//test classical max - { //test prologue - vector_aligned_t ivect(5); - std::iota(begin(ivect), end(ivect), (T)1); - ivect[0] = { (T)0 }; - TEST_EQUAL(tr, (T)0, *min_element(ivect.data(), ivect.data() + ivect.size())); - } - { //test epilogue - vector_aligned_t ivect(100); - std::iota(begin(ivect), end(ivect), (T)1); - ivect[99] = { (T)0 }; - TEST_EQUAL(tr, (T)0, *min_element(ivect.data(), ivect.data() + ivect.size())); + for (auto size : m_sizes) + { + {//aligned input/ouput + auto input(DataGeneratorAligned>(size, m_generator)); + std::reverse(std::begin(input), std::end(input)); + auto res_std = std::min_element(cbegin(input), cend(input)); + auto res_simd = simdpp::min_element(input.data(), input.data() + input.size()); + TEST_EQUAL(tr, *res_std, *res_simd); } - { //test main loop and epilogue on aligned vector - vector_aligned_t ivect(100); - std::iota(begin(ivect), end(ivect), (T)1); - ivect[50] = { (T)0 }; - TEST_EQUAL(tr, (T)0, *min_element(ivect.data(), ivect.data() + ivect.size())); - } - } - {//test max with comp op - { //test prologue - vector_aligned_t ivect(5); - std::iota(begin(ivect), end(ivect), (T)1); - ivect[0] = { (T)0 }; - TEST_EQUAL(tr, (T)0, *min_element(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); + {//unaligned input/ouput + auto input(DataGenerator>(size, m_generator)); + std::reverse(std::begin(input), std::end(input)); + auto res_std = std::min_element(cbegin(input), cend(input)); + auto res_simd = simdpp::min_element(input.data(), input.data() + input.size()); + TEST_EQUAL(tr, *res_std, *res_simd); + } - { //test epilogue - vector_aligned_t ivect(100); - std::iota(begin(ivect), end(ivect), (T)1); - ivect[99] = { (T)0 }; - TEST_EQUAL(tr, (T)0, *min_element(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); + {//aligned input/ouput + predicate + auto input(DataGeneratorAligned>(size, m_generator)); + std::reverse(std::begin(input), std::end(input)); + auto res_std = std::min_element(cbegin(input), cend(input), cmpOPGreater); + auto res_simd = simdpp::min_element(input.data(), input.data() + input.size(), cmpOPGreater); + TEST_EQUAL(tr, *res_std, *res_simd); + } - { //test main loop and epilogue on aligned vector - vector_aligned_t ivect(100); - std::iota(begin(ivect), end(ivect), (T)1); - ivect[50] = { (T)0 }; - TEST_EQUAL(tr, (T)0, *min_element(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); + {//unaligned input/ouput + predicate + auto input(DataGenerator>(size, m_generator)); + std::reverse(std::begin(input), std::end(input)); + auto res_std = std::min_element(cbegin(input), cend(input), cmpOPGreater); + auto res_simd = simdpp::min_element(input.data(), input.data() + input.size(), cmpOPGreater); + TEST_EQUAL(tr, *res_std, *res_simd); } } - } + std::vector m_sizes; + GeneratorRandom m_generator; +}; - void test_min_element(TestResults& res, TestReporter& tr) - { - using namespace simdpp; - TestResultsSet& ts = res.new_results_set("min_element"); - test_min_element_type(ts, tr); - test_min_element_type(ts, tr); - //test_max_type(ts, tr); //FIXME - //test_max_type(ts, tr); //FIXME - test_min_element_type(ts, tr); - test_min_element_type(ts, tr); - test_min_element_type(ts, tr); - test_min_element_type(ts, tr); - test_min_element_type(ts, tr); - test_min_element_type(ts, tr); +template +void test_min_element_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + auto cmpOPGreater = binary_cmp_greater(); + {//test classical max + { //test prologue + vector_aligned_t ivect(5); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[0] = { (T)0 }; + TEST_EQUAL(tr, (T)0, *min_element(ivect.data(), ivect.data() + ivect.size())); + } + { //test epilogue + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[99] = { (T)0 }; + TEST_EQUAL(tr, (T)0, *min_element(ivect.data(), ivect.data() + ivect.size())); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[50] = { (T)0 }; + TEST_EQUAL(tr, (T)0, *min_element(ivect.data(), ivect.data() + ivect.size())); + } + } + {//test max with comp op + { //test prologue + vector_aligned_t ivect(5); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[0] = { (T)0 }; + TEST_EQUAL(tr, (T)0, *min_element(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); + } + { //test epilogue + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[99] = { (T)0 }; + TEST_EQUAL(tr, (T)0, *min_element(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(100); + std::iota(begin(ivect), end(ivect), (T)1); + ivect[50] = { (T)0 }; + TEST_EQUAL(tr, (T)0, *min_element(ivect.data(), ivect.data() + ivect.size(), cmpOPGreater)); + } } + MinElementFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144 });//0 generate null ptr inputs/ouput + fuzzing(tr); +} + +void test_min_element(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("min_element"); + test_min_element_type(ts, tr); + test_min_element_type(ts, tr); + //test_max_type(ts, tr); //FIXME + //test_max_type(ts, tr); //FIXME + test_min_element_type(ts, tr); + test_min_element_type(ts, tr); + test_min_element_type(ts, tr); + test_min_element_type(ts, tr); + test_min_element_type(ts, tr); + test_min_element_type(ts, tr); +} } // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/none_of.cc b/test/insn/none_of.cc index b878a61e..e33c06c6 100644 --- a/test/insn/none_of.cc +++ b/test/insn/none_of.cc @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -10,65 +11,111 @@ #include #include #include +#include +#include /* srand, rand */ +#include /* time */ namespace SIMDPP_ARCH_NAMESPACE { - template - struct UnaryPredicateEqualValue - { - public: - UnaryPredicateEqualValue(T val) :m_val(val), m_val_simd(simdpp::splat(val)) {} - using simd_mask_T = typename simdpp::typetraits::simd_mask_type; - using simd_type_T = typename simdpp::typetraits::simd_type; +template + struct UnaryPredicateEqualValue +{ +public: + UnaryPredicateEqualValue(T val) :m_val(val), m_val_simd(simdpp::splat(val)) {} + using simd_mask_T = typename simdpp::simd_traits::simd_mask_type; + using simd_type_T = typename simdpp::simd_traits::simd_type; - bool operator()(T a) const { return a == m_val; } - simd_mask_T operator()(const simd_type_T& a) const { return cmp_eq(a, m_val_simd); } + bool operator()(T a) const { return a == m_val; } + simd_mask_T operator()(const simd_type_T& a) const { return cmp_eq(a, m_val_simd); } - T m_val; - simd_type_T m_val_simd; - }; + T m_val; + simd_type_T m_val_simd; +}; - - template - void test_none_of_type(TestResultsSet& ts, TestReporter& tr) - { - using namespace simdpp; - using vector_t = std::vector; - using vector_aligned_t = std::vector::alignment>>; - - {//test with predicate - const auto predEqualTen = UnaryPredicateEqualValue((T)10); - { //test prologue - vector_t ivect = { (T)1,(T)2}; - auto res = none_of(ivect.data(), ivect.data() + ivect.size(), predEqualTen); - auto resstd = std::none_of(begin(ivect), end(ivect), predEqualTen); - TEST_EQUAL(tr, res, resstd); - } - { //test main loop and epilogue on aligned vector - vector_aligned_t ivect(50, (T)5); - ivect[49]=(T)10; - auto res = none_of(ivect.data(), ivect.data() + ivect.size(), predEqualTen); - auto resstd = std::none_of(begin(ivect), end(ivect), predEqualTen); - TEST_EQUAL(tr, res, resstd); - } - } +template +struct NoneOffFuzzingTest +{ + NoneOffFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator(5) {} + void operator()(TestReporter& tr) + { + const auto predEqualTen = UnaryPredicateEqualValue((T)10); + srand((unsigned int)time(nullptr)); + for (auto size : m_sizes) + { + {//aligned input/ouput + auto input(DataGeneratorAligned>(size, m_generator)); + input[(size_t)(rand() % input.size())] = 10; + auto res_std = std::none_of(cbegin(input), cend(input), predEqualTen); + auto res_simd = simdpp::none_of(input.data(), input.data() + input.size(), predEqualTen); + TEST_EQUAL(tr, res_std, res_simd); + } + {//non aligned input/ouput + auto input(DataGenerator>(size, m_generator)); + input[(size_t)(rand() % input.size())] = 10; + auto res_std = std::none_of(cbegin(input), cend(input), predEqualTen); + auto res_simd = simdpp::none_of(input.data(), input.data() + input.size(), predEqualTen); + TEST_EQUAL(tr, res_std, res_simd); + } + + {//aligned input/ouput + auto input(DataGeneratorAligned>(size, m_generator)); + auto res_std = std::none_of(cbegin(input), cend(input), predEqualTen); + auto res_simd = simdpp::none_of(input.data(), input.data() + input.size(), predEqualTen); + TEST_EQUAL(tr, res_std, res_simd); + } + {//non aligned input/ouput + auto input(DataGenerator>(size, m_generator)); + auto res_std = std::none_of(cbegin(input), cend(input), predEqualTen); + auto res_simd = simdpp::none_of(input.data(), input.data() + input.size(), predEqualTen); + TEST_EQUAL(tr, res_std, res_simd); + } + } + } + std::vector m_sizes; + GeneratorConstant m_generator; +}; + +template + void test_none_of_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; - } + {//test with predicate + const auto predEqualTen = UnaryPredicateEqualValue((T)10); + { //test prologue + vector_t ivect = { (T)1,(T)2}; + auto res = none_of(ivect.data(), ivect.data() + ivect.size(), predEqualTen); + auto resstd = std::none_of(begin(ivect), end(ivect), predEqualTen); + TEST_EQUAL(tr, res, resstd); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(50, (T)5); + ivect[49]=(T)10; + auto res = none_of(ivect.data(), ivect.data() + ivect.size(), predEqualTen); + auto resstd = std::none_of(begin(ivect), end(ivect), predEqualTen); + TEST_EQUAL(tr, res, resstd); + } + } + NoneOffFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144 });//0 generate null ptr inputs/ouput + fuzzing(tr); +} - void test_none_of(TestResults& res, TestReporter& tr) - { - using namespace simdpp; - TestResultsSet& ts = res.new_results_set("none_of"); - //test_none_of_type(ts, tr); //FIXME - //test_none_of_type(ts, tr); //FIXME - test_none_of_type(ts, tr); - test_none_of_type(ts, tr); - test_none_of_type(ts, tr); - test_none_of_type(ts, tr); - test_none_of_type(ts, tr); - test_none_of_type(ts, tr); - test_none_of_type(ts, tr); - test_none_of_type(ts, tr); - } +void test_none_of(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("none_of"); + //test_none_of_type(ts, tr); //FIXME + //test_none_of_type(ts, tr); //FIXME + test_none_of_type(ts, tr); + test_none_of_type(ts, tr); + test_none_of_type(ts, tr); + test_none_of_type(ts, tr); + test_none_of_type(ts, tr); + test_none_of_type(ts, tr); + test_none_of_type(ts, tr); + test_none_of_type(ts, tr); +} } // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/reduce.cc b/test/insn/reduce.cc index 8778bc17..e3b48ad6 100644 --- a/test/insn/reduce.cc +++ b/test/insn/reduce.cc @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas +Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -10,126 +11,179 @@ Distributed under the Boost Software License, Version 1.0. #include #include #include +#include +#include namespace SIMDPP_ARCH_NAMESPACE { - template< typename T> - struct BinaryOpMul + +template< typename T> +struct BinaryOpPlus +{ +public: + BinaryOpPlus() {} + SIMDPP_INL T operator()(T const &a0, T const &a1) const noexcept { - public: - BinaryOpMul() {} - SIMDPP_INL T operator()(T const &a0, T const &a1) const noexcept - { - return a0 * a1; - } + return a0 + a1; + } - template - SIMDPP_INL U operator()(U const &a0, U const &a1) const noexcept - { - return a0*a1; - } - }; + template + SIMDPP_INL U operator()(U const &a0, U const &a1) const noexcept + { + return a0 + a1; + } +}; + +//from https://stackoverflow.com/questions/17333/what-is-the-most-effective-way-for-float-and-double-comparison +template +bool approximatelyEqual(T a, T b, T epsilon) +{ + return fabs(a - b) <= ((fabs(a) < fabs(b) ? fabs(b) : fabs(a)) * epsilon); +} - template - void test_reduce_type(TestResultsSet& ts, TestReporter& tr) +template +struct ReduceFuzzingTest +{ + ReduceFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator() {} + void operator()(TestReporter& tr) { - using namespace simdpp; - using vector_t = std::vector; - using vector_aligned_t = std::vector::alignment>>; - { //test prologue - vector_t ivect = { 0,41 }; - T expected = {42}; - T init = { 1 }; - T res=reduce(ivect.data(), ivect.data() + ivect.size(),init); - TEST_EQUAL(tr, expected,res); - } - { //test main loop and epilogue on aligned vector - vector_aligned_t ivect(42, 1); - T expected = { 42 }; - T init = { 0 }; - T res = reduce(ivect.data(), ivect.data() + ivect.size(), init); - TEST_EQUAL(tr, expected, res); - } - { //test main loop and epilogue on range - vector_aligned_t ivect(150, 1); - T expected = { 42 }; - T init = { 0 }; - T res = reduce(ivect.data()+8u, ivect.data() + ivect.size()-100u, init); - TEST_EQUAL(tr, expected, res); + T init = (T)0; + for (auto size : m_sizes) + { + {//aligned input/ouput + const auto input(DataGeneratorAligned>(size, m_generator)); + auto res_std = std::accumulate(cbegin(input), cend(input), init); + //auto res_std=std::reduce(cbegin(input), cend(input),init); + auto res_simd = simdpp::reduce(input.data(), input.data() + input.size(), init); + TEST_EQUAL(tr, approximatelyEqual(res_std, res_simd, 10 * std::numeric_limits::epsilon()), true); + } + {//non aligned input/ouput + const auto input(DataGenerator>(size, m_generator)); + auto res_std = std::accumulate(cbegin(input), cend(input), init); + //auto res_std = std::reduce(cbegin(input), cend(input), init); + auto res_simd = simdpp::reduce(input.data(), input.data() + input.size(), init); + TEST_EQUAL(tr, approximatelyEqual(res_std, res_simd, 10 * std::numeric_limits::epsilon()), true); + } } } - - template - void test_reducebinop_type(TestResultsSet& ts, TestReporter& tr) - { - using namespace simdpp; - using vector_t = std::vector; - using vector_aligned_t = std::vector::alignment>>; - auto opMul = BinaryOpMul(); + std::vector m_sizes; + GeneratorRandom m_generator; +}; - { //test prologue - vector_t ivect = { 1,42 }; - T expected = { 42 }; - T init = { 1 }; - T neutral = { 1 }; - T res = reduce(ivect.data(), ivect.data() + ivect.size(), init,neutral,opMul); - TEST_EQUAL(tr, expected, res); - } - { //test main loop and epilogue on unaligned vector - vector_t ivect(150, 1); - T expected = { 42 }; - T init = { 42 }; - T neutral = { 1 }; - T res = reduce(ivect.data(), ivect.data() + ivect.size(), init, neutral, opMul); - TEST_EQUAL(tr, expected, res); - } - { //test main loop and epilogue on aligned vector check mul - vector_aligned_t ivect(10); - std::iota(begin(ivect), end(ivect), (T)1.); - T expected = { 3628800 };//aka 10! - T init = { 1 }; - T neutral = { 1 }; - T res = reduce(ivect.data(), ivect.data() + ivect.size(), init, neutral, opMul); - TEST_EQUAL(tr, expected, res); - } - { //test main loop and epilogue on aligned vector on range check mul - vector_aligned_t ivect(10); - std::iota(begin(ivect), end(ivect), (T)1.); - T expected = { 840}; //4*5*6*7 - T init = { 1 }; - T neutral = { 1 }; - T res = reduce(ivect.data() + 3, ivect.data() + ivect.size() - 3, init, neutral, opMul); - TEST_EQUAL(tr, expected, res); +template +struct ReduceBinaryFuzzingTest +{ + ReduceBinaryFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator() {} + void operator()(TestReporter& tr) + { + T init = (T)0; + auto opPlus = BinaryOpPlus(); + T neutral = (T)0; + for (auto size : m_sizes) + { + {//aligned input/ouput + const auto input(DataGeneratorAligned>(size, m_generator)); + //auto res_std = std::reduce(cbegin(input), cend(input),init, opPlus); + auto res_std = std::accumulate(cbegin(input), cend(input), init, opPlus); + auto res_simd = simdpp::reduce(input.data(), input.data() + input.size(), init, neutral, opPlus); + TEST_EQUAL(tr, approximatelyEqual(res_std, res_simd, 10 * std::numeric_limits::epsilon()), true); + } + {//non aligned input/ouput + const auto input(DataGenerator>(size, m_generator)); + //auto res_std = std::reduce(cbegin(input), cend(input), init, opPlus); + auto res_std = std::accumulate(cbegin(input), cend(input), init, opPlus); + auto res_simd = simdpp::reduce(input.data(), input.data() + input.size(), init, neutral, opPlus); + TEST_EQUAL(tr, approximatelyEqual(res_std, res_simd, 10 * std::numeric_limits::epsilon()), true); + } } } + std::vector m_sizes; + GeneratorRandom m_generator; +}; - void test_reduce(TestResults& res, TestReporter& tr) - { - using namespace simdpp; - TestResultsSet& ts = res.new_results_set("reduce"); - test_reduce_type(ts, tr); - test_reduce_type(ts, tr); - test_reduce_type(ts, tr); - test_reduce_type(ts, tr); - test_reduce_type(ts, tr); - test_reduce_type(ts, tr); - test_reduce_type(ts, tr); - test_reduce_type(ts, tr); - test_reduce_type(ts, tr); - test_reduce_type(ts, tr); +template +void test_reduce_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + { //test prologue + vector_t ivect = { 0,41 }; + T expected = { 42 }; + T init = { 1 }; + T res = reduce(ivect.data(), ivect.data() + ivect.size(), init); + TEST_EQUAL(tr, expected, res); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(42, 1); + T expected = { 42 }; + T init = { 0 }; + T res = reduce(ivect.data(), ivect.data() + ivect.size(), init); + TEST_EQUAL(tr, expected, res); + } + { //test main loop and epilogue on range + vector_aligned_t ivect(150, 1); + T expected = { 42 }; + T init = { 0 }; + T res = reduce(ivect.data() + 8u, ivect.data() + ivect.size() - 100u, init); + TEST_EQUAL(tr, expected, res); + } + ReduceFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144 });//0 generate null ptr inputs/ouput + fuzzing(tr); +} - test_reducebinop_type(ts, tr); - test_reducebinop_type(ts, tr); - //TR Why no operator * for above types at least for uint64_t - //and int64_t //FIXME - //test_reducebinop_type(ts, tr); - //test_reducebinop_type(ts, tr); - //test_reducebinop_type(ts, tr); - //test_reducebinop_type(ts, tr); - //test_reducebinop_type(ts, tr); - //test_reducebinop_type(ts, tr); - //test_reducebinop_type(ts, tr); - //test_reducebinop_type(ts, tr); +template +void test_reducebinop_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + auto opPlus = BinaryOpPlus(); + { //test prologue + vector_t ivect = { 1,42 }; + T expected = { 43 }; + T init = { 0 }; + T neutral = { 0 }; + T res = reduce(ivect.data(), ivect.data() + ivect.size(), init, neutral, opPlus); + TEST_EQUAL(tr, expected, res); } + { //test main loop and epilogue on unaligned vector + vector_t ivect(125, 1); + T expected = { 125 }; + T init = { 0 }; + T neutral = { 0 }; + T res = reduce(ivect.data(), ivect.data() + ivect.size(), init, neutral, opPlus); + TEST_EQUAL(tr, expected, res); + } + ReduceBinaryFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144 });//0 generate null ptr inputs/ouput + fuzzing(tr); +} + +void test_reduce(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("reduce"); + test_reduce_type(ts, tr); + test_reduce_type(ts, tr); + /* test_reduce_type(ts, tr); + test_reduce_type(ts, tr); + test_reduce_type(ts, tr); + test_reduce_type(ts, tr); + test_reduce_type(ts, tr); + test_reduce_type(ts, tr); + test_reduce_type(ts, tr); + test_reduce_type(ts, tr);*/ + + test_reducebinop_type(ts, tr); + test_reducebinop_type(ts, tr); + /*test_reducebinop_type(ts, tr); + test_reducebinop_type(ts, tr); + test_reducebinop_type(ts, tr); + test_reducebinop_type(ts, tr); + test_reducebinop_type(ts, tr); + test_reducebinop_type(ts, tr); + test_reducebinop_type(ts, tr); + test_reducebinop_type(ts, tr);*/ -} // namespace SIMDPP_ARCH_NAMESPACE +} +} \ No newline at end of file diff --git a/test/insn/replace.cc b/test/insn/replace.cc index 7c3a2ca7..0f75e969 100644 --- a/test/insn/replace.cc +++ b/test/insn/replace.cc @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -12,52 +13,77 @@ #include #include #include +#include namespace SIMDPP_ARCH_NAMESPACE { - template - void test_replace_type(TestResultsSet& ts, TestReporter& tr) - { - using namespace simdpp; - using vector_t = std::vector; - using vector_aligned_t = std::vector::alignment>>; - { //test prologue - vector_t ivect = { (T)42,(T)42 }; - vector_t expected = { (T)0,(T)0 }; - - replace(ivect.data(),ivect.data()+ivect.size(),(T)42,(T)0 ); - - for (auto i = 0; i < expected.size(); ++i) //TODO make TEST_EQUAL_COLECTIONS - { - TEST_EQUAL(tr, ivect[i], expected[i]); - } - } - { //test main loop and epilogue on aligned vector - vector_aligned_t ivect(50); - std::iota(begin(ivect),end(ivect),(T)0); - vector_aligned_t expected(50); - std::copy(begin(ivect),end(ivect),begin(expected)); - expected[39]=42; - replace(ivect.data(),ivect.data()+ivect.size(),(T)39,(T)42 ); - for (auto i = 0; i < expected.size(); ++i) //TODO make TEST_EQUAL_COLECTIONS - TEST_EQUAL(tr, expected[i], ivect[i]); - } - } - - void test_replace(TestResults& res, TestReporter& tr) - { - using namespace simdpp; - TestResultsSet& ts = res.new_results_set("replace"); - test_replace_type(ts, tr); - test_replace_type(ts, tr); - test_replace_type(ts, tr); - test_replace_type(ts, tr); - test_replace_type(ts, tr); - test_replace_type(ts, tr); - test_replace_type(ts, tr); - test_replace_type(ts, tr); - test_replace_type(ts, tr); - test_replace_type(ts, tr); - } +template +struct ReplaceFuzzingTest +{ + ReplaceFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generatorFive(5),m_generatorTen(10) {} + void operator()(TestReporter& tr) + { + for (auto size : m_sizes) + { + {//aligned input/ouput + auto input(DataGeneratorAligned>(size, m_generatorFive)); + auto expected(DataGeneratorAligned>(size, m_generatorTen)); + simdpp::replace(input.data(),input.data()+input.size(),(T)5,(T)10); + TEST_EQUAL_COLLECTIONS(tr, input, expected); + } + {//non aligned input/ouput + auto input(DataGenerator>(size, m_generatorFive)); + auto expected(DataGenerator>(size, m_generatorTen)); + simdpp::replace(input.data(), input.data() + input.size(),(T)5, (T)10); + TEST_EQUAL_COLLECTIONS(tr, input, expected); + } + } + } + std::vector m_sizes; + GeneratorConstant m_generatorFive; + GeneratorConstant m_generatorTen; +}; + +template + void test_replace_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + { //test prologue + vector_t ivect = { (T)42,(T)42 }; + vector_t expected = { (T)0,(T)0 }; + + replace(ivect.data(),ivect.data()+ivect.size(),(T)42,(T)0 ); + TEST_EQUAL_COLLECTIONS(tr, ivect, expected); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(50); + std::iota(begin(ivect),end(ivect),(T)0); + vector_aligned_t expected(50); + std::copy(begin(ivect),end(ivect),begin(expected)); + expected[39]=42; + replace(ivect.data(),ivect.data()+ivect.size(),(T)39,(T)42 ); + TEST_EQUAL_COLLECTIONS(tr, ivect, expected); + } + ReplaceFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144 });//0 generate null ptr inputs/ouput + fuzzing(tr); +} + +void test_replace(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("replace"); + test_replace_type(ts, tr); + test_replace_type(ts, tr); + test_replace_type(ts, tr); + test_replace_type(ts, tr); + test_replace_type(ts, tr); + test_replace_type(ts, tr); + test_replace_type(ts, tr); + test_replace_type(ts, tr); + test_replace_type(ts, tr); + test_replace_type(ts, tr); +} } // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/replace_if.cc b/test/insn/replace_if.cc index 35c7c04f..ded3a280 100644 --- a/test/insn/replace_if.cc +++ b/test/insn/replace_if.cc @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -11,70 +12,97 @@ #include #include #include -#include +#include namespace SIMDPP_ARCH_NAMESPACE { - template - struct UnaryPredicateEqualValue - { - public: - UnaryPredicateEqualValue(T val) :m_val(val), m_val_simd(simdpp::splat(val)) {} - using simd_mask_T = typename simdpp::typetraits::simd_mask_type; - using simd_type_T = typename simdpp::typetraits::simd_type; +template + struct UnaryPredicateEqualValue +{ +public: + UnaryPredicateEqualValue(T val) :m_val(val), m_val_simd(simdpp::splat(val)) {} + using simd_mask_T = typename simdpp::simd_traits::simd_mask_type; + using simd_type_T = typename simdpp::simd_traits::simd_type; - bool operator()(T a) const { return a == m_val; } - simd_mask_T operator()(const simd_type_T& a) const { return cmp_eq(a, m_val_simd); } + bool operator()(T a) const { return a == m_val; } + simd_mask_T operator()(const simd_type_T& a) const { return cmp_eq(a, m_val_simd); } - T m_val; - simd_type_T m_val_simd; - }; - - template - void test_replace_if_type(TestResultsSet& ts, TestReporter& tr) - { - using namespace simdpp; - using vector_t = std::vector; - using vector_aligned_t = std::vector::alignment>>; + T m_val; + simd_type_T m_val_simd; +}; + +template +struct ReplaceIfFuzzingTest +{ + ReplaceIfFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generatorFive(5), m_generatorTen(10) {} + void operator()(TestReporter& tr) + { + const auto pred = UnaryPredicateEqualValue((T)5); + for (auto size : m_sizes) + { + {//aligned input/ouput + auto input(DataGeneratorAligned>(size, m_generatorFive)); + auto expected(DataGeneratorAligned>(size, m_generatorTen)); + simdpp::replace_if(input.data(), input.data() + input.size(), pred, (T)10); + TEST_EQUAL_COLLECTIONS(tr, input, expected); + } + {//non aligned input/ouput + auto input(DataGenerator>(size, m_generatorFive)); + auto expected(DataGenerator>(size, m_generatorTen)); + simdpp::replace_if(input.data(), input.data() + input.size(), pred, (T)10); + TEST_EQUAL_COLLECTIONS(tr, input, expected); + } + } + } + std::vector m_sizes; + GeneratorConstant m_generatorFive; + GeneratorConstant m_generatorTen; +}; + + +template + void test_replace_if_type(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; - { //test prologue - vector_t ivect = { (T)42,(T)42 }; - vector_t expected = { (T)0,(T)0 }; - const auto pred = UnaryPredicateEqualValue((T)42); - replace_if(ivect.data(),ivect.data()+ivect.size(),pred,(T)0 ); - - for (auto i = 0; i < expected.size(); ++i) //TODO make TEST_EQUAL_COLECTIONS - { - TEST_EQUAL(tr, ivect[i], expected[i]); - } - } - { //test main loop and epilogue on aligned vector - vector_aligned_t ivect(50); - std::iota(begin(ivect),end(ivect),(T)0); - vector_aligned_t expected(50); - std::copy(begin(ivect),end(ivect),begin(expected)); - expected[39]=42; - const auto pred = UnaryPredicateEqualValue((T)39); - replace_if(ivect.data(),ivect.data()+ivect.size(),pred,(T)42 ); - for (auto i = 0; i < expected.size(); ++i) //TODO make TEST_EQUAL_COLECTIONS - TEST_EQUAL(tr, expected[i], ivect[i]); - } - } + { //test prologue + vector_t ivect = { (T)42,(T)42 }; + vector_t expected = { (T)0,(T)0 }; + const auto pred = UnaryPredicateEqualValue((T)42); + replace_if(ivect.data(),ivect.data()+ivect.size(),pred,(T)0 ); + TEST_EQUAL_COLLECTIONS(tr, ivect, expected); + + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(50); + std::iota(begin(ivect),end(ivect),(T)0); + vector_aligned_t expected(50); + std::copy(begin(ivect),end(ivect),begin(expected)); + expected[39]=42; + const auto pred = UnaryPredicateEqualValue((T)39); + replace_if(ivect.data(),ivect.data()+ivect.size(),pred,(T)42 ); + TEST_EQUAL_COLLECTIONS(tr, ivect, expected); + } + ReplaceIfFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144 });//0 generate null ptr inputs/ouput + fuzzing(tr); +} - void test_replace_if(TestResults& res, TestReporter& tr) - { - using namespace simdpp; - TestResultsSet& ts = res.new_results_set("replace_if"); - test_replace_if_type(ts, tr); - test_replace_if_type(ts, tr); - test_replace_if_type(ts, tr); - test_replace_if_type(ts, tr); - test_replace_if_type(ts, tr); - test_replace_if_type(ts, tr); - test_replace_if_type(ts, tr); - test_replace_if_type(ts, tr); - test_replace_if_type(ts, tr); - test_replace_if_type(ts, tr); - } +void test_replace_if(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("replace_if"); + test_replace_if_type(ts, tr); + test_replace_if_type(ts, tr); + test_replace_if_type(ts, tr); + test_replace_if_type(ts, tr); + test_replace_if_type(ts, tr); + test_replace_if_type(ts, tr); + test_replace_if_type(ts, tr); + test_replace_if_type(ts, tr); + test_replace_if_type(ts, tr); + test_replace_if_type(ts, tr); +} } // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/transform.cc b/test/insn/transform.cc index 0833d675..8f4b5f6a 100644 --- a/test/insn/transform.cc +++ b/test/insn/transform.cc @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas +Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -9,162 +10,214 @@ Distributed under the Boost Software License, Version 1.0. #include "../utils/test_results.h" #include #include +#include namespace SIMDPP_ARCH_NAMESPACE { - template< typename T> - struct UnaryOpAddValue - { - T m_val; - public: - UnaryOpAddValue(T val) :m_val(val) {} - SIMDPP_INL T operator()(T const &a) const noexcept - { - return m_val + a; - } - template - SIMDPP_INL U operator()(U const &a) const noexcept - { - return m_val + a; - } - }; - template< typename T> - struct BinaryOpAdd +template< typename T> +struct UnaryOpAddValue +{ + T m_val; +public: + UnaryOpAddValue(T val) :m_val(val) {} + SIMDPP_INL T operator()(T const &a) const noexcept { - public: - BinaryOpAdd() {} - SIMDPP_INL T operator()(T const &a0, T const &a1) const noexcept - { - return a0 + a1; - } - - template - SIMDPP_INL U operator()(U const &a0, U const &a1) const noexcept - { - using namespace simdpp; - return a0 + a1; - } - }; + return m_val + a; + } + template + SIMDPP_INL U operator()(U const &a) const noexcept + { + return m_val + a; + } +}; + +template< typename T> +struct BinaryOpAdd +{ +public: + BinaryOpAdd() {} + SIMDPP_INL T operator()(T const &a0, T const &a1) const noexcept + { + return a0 + a1; + } - template - void test_transform_type_unary(TestResultsSet& ts, TestReporter& tr) + template + SIMDPP_INL U operator()(U const &a0, U const &a1) const noexcept { using namespace simdpp; - using vector_t = std::vector; - using vector_aligned_t = std::vector::alignment>>; + return a0 + a1; + } +}; + + +template +struct TransformUnaryFuzzingTest +{ + TransformUnaryFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator() {} + void operator()(TestReporter& tr) + { auto opPlusOne = UnaryOpAddValue(1); - { //test prologue - vector_t ivect = { 0,1 }; - vector_t ovect(2); - vector_t expected = { 1,2 }; - - transform(ivect.data(), ivect.data() + ivect.size(), ovect.data(), opPlusOne); - for (auto i = 0; i < ovect.size(); ++i) //TODO make TEST_EQUAL_COLECTIONS - { - TEST_EQUAL(tr, expected[i], ovect[i]); - } - } - { //test main loop and epilogue on aligned vector - vector_aligned_t ivect(150, 0); - vector_aligned_t ovect(150); - vector_t expected(150, 1); - - transform(ivect.data(), ivect.data() + ivect.size(), ovect.data(), opPlusOne); - for (auto i = 0; i < ovect.size(); ++i) //TODO make TEST_EQUAL_COLECTIONS - { - TEST_EQUAL(tr, expected[i], ovect[i]); + for (auto size : m_sizes) + { + {//aligned input/ouput + const auto input(DataGeneratorAligned>(size, m_generator)); + std::vector::alignment>> expected(size); + std::vector::alignment>> output(size); + std::transform(cbegin(input), cend(input), begin(expected), opPlusOne); + simdpp::transform(input.data(), input.data() + input.size(), output.data(), opPlusOne); + TEST_EQUAL_COLLECTIONS(tr, output, expected); } - } - { //test main loop and epilogue on range - vector_aligned_t ivect(150, 0); - vector_aligned_t ovect(150); - vector_t expected(150, 1); - - transform(ivect.data() + 10u, ivect.data() + ivect.size() - 10u, ovect.data() + 10u, opPlusOne); - for (auto i = 10; i < ovect.size() - 10u; ++i) //TODO make TEST_EQUAL_COLECTIONS - { - TEST_EQUAL(tr, expected[i], ovect[i]); + {//non aligned input/ouput + const auto input(DataGenerator>(size, m_generator)); + std::vector expected(size); + std::vector output(size); + std::transform(cbegin(input), cend(input), begin(expected), opPlusOne); + simdpp::transform(input.data(), input.data() + input.size(), output.data(), opPlusOne); + TEST_EQUAL_COLLECTIONS(tr, output, expected); } } - - - } - template - void test_transform_type_binary(TestResultsSet& ts, TestReporter& tr) + std::vector m_sizes; + GeneratorRandom m_generator; +}; + +template +struct TransformBinaryFuzzingTest +{ + TransformBinaryFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator() {} + void operator()(TestReporter& tr) { - using namespace simdpp; - using vector_t = std::vector; - using vector_u = std::vector; - using vector_aligned_t = std::vector::alignment>>; - using vector_aligned_u = std::vector::alignment>>; auto opPlus = BinaryOpAdd(); - { //test prologue - vector_t ivect1 = { 0,1 }; - vector_u ivect2 = { 1,2 }; - vector_t ovect(2); - vector_t expected = { 1,3 }; - - transform(ivect1.data(), ivect1.data() + ivect1.size(), ivect2.data(), ovect.data(), opPlus); - for (auto i = 0; i < ovect.size(); ++i) //TODO make TEST_EQUAL_COLECTIONS - { - TEST_EQUAL(tr, expected[i], ovect[i]); - } - } - { //test main loop and epilogue on aligned vector - vector_aligned_t ivect1(150, 0); - vector_aligned_t ivect2(150, 1); - vector_aligned_t ovect(150); - vector_t expected(150, 1); - - transform(ivect1.data(), ivect1.data() + ivect1.size(), ivect2.data(), ovect.data(), opPlus); - for (auto i = 0; i < ovect.size(); ++i) //TODO make TEST_EQUAL_COLECTIONS - { - TEST_EQUAL(tr, expected[i], ovect[i]); + for (auto size : m_sizes) + { + {//aligned input/ouput + const auto input1(DataGeneratorAligned>(size, m_generator)); + const auto input2(DataGeneratorAligned>(size, m_generator)); + std::vector::alignment>> expected(size); + std::vector::alignment>> output(size); + std::transform(cbegin(input1), cend(input1), cbegin(input2), begin(expected), opPlus); + simdpp::transform(input1.data(), input1.data() + input1.size(), input2.data(), output.data(), opPlus); + TEST_EQUAL_COLLECTIONS(tr, output, expected); } - } - { //test main loop and epilogue on range - vector_aligned_t ivect1(150, 0); - vector_aligned_t ivect2(150, 1); - vector_aligned_t ovect(150); - vector_t expected(150, 1); - - transform(ivect1.data() + 10u, ivect1.data() + ivect1.size() - 10u, ivect2.data() + 10u, ovect.data() + 10u, opPlus); - for (auto i = 10u; i < ovect.size() - 10u; ++i) //TODO make TEST_EQUAL_COLECTIONS - { - TEST_EQUAL(tr, expected[i], ovect[i]); + {//non aligned input/ouput + const auto input1(DataGenerator>(size, m_generator)); + const auto input2(DataGenerator>(size, m_generator)); + std::vector expected(size); + std::vector output(size); + std::transform(cbegin(input1), cend(input1), cbegin(input2), begin(expected), opPlus); + simdpp::transform(input1.data(), input1.data() + input1.size(), input2.data(), output.data(), opPlus); + TEST_EQUAL_COLLECTIONS(tr, output, expected); } } } + std::vector m_sizes; + GeneratorRandom m_generator; +}; +template +void test_transform_type_unary(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + auto opPlusOne = UnaryOpAddValue(1); + { //test prologue + vector_t ivect = { 0,1 }; + vector_t ovect(2); + vector_t expected = { 1,2 }; + + transform(ivect.data(), ivect.data() + ivect.size(), ovect.data(), opPlusOne); + TEST_EQUAL_COLLECTIONS(tr, ovect, expected); + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect(150, 0); + vector_aligned_t ovect(150); + vector_t expected(150, 1); - void test_transform(TestResults& res, TestReporter& tr) - { - using namespace simdpp; - TestResultsSet& ts = res.new_results_set("transform"); - test_transform_type_unary(ts, tr); - test_transform_type_unary(ts, tr); - test_transform_type_unary(ts, tr); - test_transform_type_unary(ts, tr); - test_transform_type_unary(ts, tr); - test_transform_type_unary(ts, tr); - test_transform_type_unary(ts, tr); - test_transform_type_unary(ts, tr); - test_transform_type_unary(ts, tr); - test_transform_type_unary(ts, tr); - - test_transform_type_binary(ts, tr); - test_transform_type_binary(ts, tr); - test_transform_type_binary(ts, tr); - test_transform_type_binary(ts, tr); - test_transform_type_binary(ts, tr); - test_transform_type_binary(ts, tr); - test_transform_type_binary(ts, tr); - test_transform_type_binary(ts, tr); - test_transform_type_binary(ts, tr); - test_transform_type_binary(ts, tr); - + transform(ivect.data(), ivect.data() + ivect.size(), ovect.data(), opPlusOne); + TEST_EQUAL_COLLECTIONS(tr, ovect, expected); + } + TransformUnaryFuzzingTest fuzzing({1,3,5,8,21,55,89,144});//0 generate null ptr inputs/ouput + fuzzing(tr); + + +} +template +void test_transform_type_binary(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + using vector_u = std::vector; + using vector_aligned_t = std::vector::alignment>>; + using vector_aligned_u = std::vector::alignment>>; + auto opPlus = BinaryOpAdd(); + { //test prologue + vector_t ivect1 = { 0,1 }; + vector_u ivect2 = { 1,2 }; + vector_t ovect(2); + vector_t expected = { 1,3 }; + + transform(ivect1.data(), ivect1.data() + ivect1.size(), ivect2.data(), ovect.data(), opPlus); + for (auto i = 0; i < ovect.size(); ++i) //TODO make TEST_EQUAL_COLECTIONS + { + TEST_EQUAL(tr, expected[i], ovect[i]); + } + } + { //test main loop and epilogue on aligned vector + vector_aligned_t ivect1(150, 0); + vector_aligned_t ivect2(150, 1); + vector_aligned_t ovect(150); + vector_t expected(150, 1); + + transform(ivect1.data(), ivect1.data() + ivect1.size(), ivect2.data(), ovect.data(), opPlus); + for (auto i = 0; i < ovect.size(); ++i) //TODO make TEST_EQUAL_COLECTIONS + { + TEST_EQUAL(tr, expected[i], ovect[i]); + } + } + { //test main loop and epilogue on range + vector_aligned_t ivect1(150, 0); + vector_aligned_t ivect2(150, 1); + vector_aligned_t ovect(150); + vector_t expected(150, 1); + + transform(ivect1.data() + 10u, ivect1.data() + ivect1.size() - 10u, ivect2.data() + 10u, ovect.data() + 10u, opPlus); + for (auto i = 10u; i < ovect.size() - 10u; ++i) //TODO make TEST_EQUAL_COLECTIONS + { + TEST_EQUAL(tr, expected[i], ovect[i]); + } } + TransformBinaryFuzzingTest fuzzing({1,3,5,8,21,55,89,144 }); //0 generate null ptr inputs/ouput + fuzzing(tr); +} + +void test_transform(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("transform"); + test_transform_type_unary(ts, tr); + test_transform_type_unary(ts, tr); + test_transform_type_unary(ts, tr); + test_transform_type_unary(ts, tr); + test_transform_type_unary(ts, tr); + test_transform_type_unary(ts, tr); + test_transform_type_unary(ts, tr); + test_transform_type_unary(ts, tr); + test_transform_type_unary(ts, tr); + test_transform_type_unary(ts, tr); + + test_transform_type_binary(ts, tr); + test_transform_type_binary(ts, tr); + test_transform_type_binary(ts, tr); + test_transform_type_binary(ts, tr); + test_transform_type_binary(ts, tr); + test_transform_type_binary(ts, tr); + test_transform_type_binary(ts, tr); + test_transform_type_binary(ts, tr); + test_transform_type_binary(ts, tr); + test_transform_type_binary(ts, tr); + +} } // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/transform_reduce.cc b/test/insn/transform_reduce.cc index 0b52dc70..31529256 100644 --- a/test/insn/transform_reduce.cc +++ b/test/insn/transform_reduce.cc @@ -1,4 +1,5 @@ /* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at @@ -10,98 +11,168 @@ #include #include #include +#include namespace SIMDPP_ARCH_NAMESPACE { - using namespace simdpp; +using namespace simdpp; - template - struct UnaryPredicateSquare - { - using simd_type_T = typename typetraits::simd_type; - T operator()(T a) {return a*a;} - simd_type_T operator()(const simd_type_T& a) {return a*a;} - }; +template + struct UnaryPredicateSquare +{ + using simd_type_T = typename simd_traits::simd_type; + T operator()(T a) {return a*a;} + simd_type_T operator()(const simd_type_T& a) {return a*a;} +}; - template - struct BinaryPredicatePlus - { - using simd_type_T = typename typetraits::simd_type; - T operator()(T a0,T a1) {return a0 + a1;} - simd_type_T operator()(const simd_type_T& a0,const simd_type_T& a1) {return a0+a1;} - }; +template + struct BinaryPredicatePlus +{ + using simd_type_T = typename simd_traits::simd_type; + T operator()(T a0,T a1) {return a0 + a1;} + simd_type_T operator()(const simd_type_T& a0,const simd_type_T& a1) {return a0+a1;} +}; - template - struct BinaryPredicateMul - { - using simd_type_T = typename typetraits::simd_type; - T operator()(T a0,T a1) {return a0 * a1;} - simd_type_T operator()(const simd_type_T& a0,const simd_type_T& a1) {return a0*a1;} - }; - - template - void test_transform_reduce_type_unary(TestResultsSet& ts, TestReporter& tr) - { - using namespace simdpp; - using vector_t = std::vector; - using vector_aligned_t = std::vector::alignment>>; - {// - vector_aligned_t ivect={(T)0,T(1)}; - auto res=transform_reduce(ivect.data(),ivect.data()+ivect.size(),(T)0,BinaryPredicatePlus(),UnaryPredicateSquare()); - auto expected=std::inner_product( ivect.data(),ivect.data()+ivect.size(), ivect.data(), T(0)); - TEST_EQUAL(tr, expected,res); - } - {// - vector_aligned_t ivect(50); - std::iota(begin(ivect),end(ivect),(T)1); - auto res=transform_reduce(ivect.data(),ivect.data()+ivect.size(),(T)0,BinaryPredicatePlus(),UnaryPredicateSquare()); - auto expected=std::inner_product( ivect.data(),ivect.data()+ivect.size(), ivect.data(), T(0)); - TEST_EQUAL(tr, expected,res); - } - } - template - void test_transform_reduce_type_binary(TestResultsSet& ts, TestReporter& tr) - { - using namespace simdpp; - using vector_t = std::vector; - using vector_aligned_t = std::vector::alignment>>; - using vector_aligned_u = std::vector::alignment>>; - { - vector_aligned_t ivect(50); - std::iota(begin(ivect),end(ivect),(T)1); - vector_aligned_u ivect2(50); - std::iota(begin(ivect2),end(ivect2),(T)1); - auto res = transform_reduce( ivect.data(),ivect.data()+ivect.size(),ivect2.data(),T(0),BinaryPredicateMul(),BinaryPredicatePlus()); - auto expected = std::inner_product( ivect.data(), ivect.data()+ivect.size(), ivect2.data(), T(0)); - TEST_EQUAL(tr, expected,res); - } - } +template + struct BinaryPredicateMul +{ + using simd_type_T = typename simd_traits::simd_type; + T operator()(T a0,T a1) {return a0 * a1;} + simd_type_T operator()(const simd_type_T& a0,const simd_type_T& a1) {return a0*a1;} +}; - void test_transform_reduce(TestResults& res, TestReporter& tr) - { - using namespace simdpp; - TestResultsSet& ts = res.new_results_set("transform_reduce"); - test_transform_reduce_type_unary(ts, tr); - test_transform_reduce_type_unary(ts, tr); - // test_transform_reduce_type_unary(ts, tr); //FIXME - // test_transform_reduce_type_unary(ts, tr); //FIXME - // test_transform_reduce_type_unary(ts, tr); //FIXME - // test_transform_reduce_type_unary(ts, tr); //FIXME - // test_transform_reduce_type_unary(ts, tr); //FIXME - // test_transform_reduce_type_unary(ts, tr); //FIXME - // test_transform_reduce_type_unary(ts, tr); //FIXME - // test_transform_reduce_type_unary(ts, tr); //FIXME +//from https://stackoverflow.com/questions/17333/what-is-the-most-effective-way-for-float-and-double-comparison +template +bool approximatelyEqual(T a, T b, T epsilon) +{ + return fabs(a - b) <= ((fabs(a) < fabs(b) ? fabs(b) : fabs(a)) * epsilon); +} - test_transform_reduce_type_binary(ts, tr); - test_transform_reduce_type_binary(ts, tr); - // test_transform_reduce_type_binary(ts,tr); //FIXME - // test_transform_reduce_type_binary(ts, tr); //FIXME - // test_transform_reduce_type_binary(ts, tr); //FIXME - // test_transform_reduce_type_binary(ts, tr); //FIXME - // test_transform_reduce_type_binary(ts, tr); //FIXME - // test_transform_reduce_type_binary(ts, tr); //FIXME - // test_transform_reduce_type_binary(ts, tr); //FIXME - // test_transform_reduce_type_binary(ts, tr); //FIXME +template +struct TransformReduceUnaryFuzzingTest +{ + TransformReduceUnaryFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator() {} + void operator()(TestReporter& tr) + { + T init = (T)0; - } + for (auto size : m_sizes) + { + {//aligned input/ouput + const auto input(DataGeneratorAligned>(size, m_generator)); + auto res_std = std::inner_product(input.data(), input.data() + input.size(), input.data(),init); + auto res_simd = simdpp::transform_reduce(input.data(), input.data() + input.size(),init, BinaryPredicatePlus(), UnaryPredicateSquare()); + TEST_EQUAL(tr, approximatelyEqual(res_std,res_simd, 10 * std::numeric_limits::epsilon()),true); + } + {//non aligned input/ouput + const auto input(DataGenerator>(size, m_generator)); + auto res_std = std::inner_product(input.data(), input.data() + input.size(), input.data(), init); + auto res_simd = simdpp::transform_reduce(input.data(), input.data() + input.size(), init, BinaryPredicatePlus(), UnaryPredicateSquare()); + TEST_EQUAL(tr, approximatelyEqual(res_std,res_simd, 10 * std::numeric_limits::epsilon()), true); + } + } + } + std::vector m_sizes; + GeneratorRandom m_generator; +}; + +template +struct TransformReduceBinaryFuzzingTest +{ + TransformReduceBinaryFuzzingTest(std::initializer_list sizes = {}) :m_sizes(sizes), m_generator() {} + void operator()(TestReporter& tr) + { + T init = (T)1; + + for (auto size : m_sizes) + { + {//aligned input/ouput + const auto input(DataGeneratorAligned>(size, m_generator)); + const auto input2(DataGeneratorAligned>(size, m_generator)); + auto res_std = std::inner_product(input.data(), input.data() + input.size(), input2.data(), init); + auto res_simd = simdpp::transform_reduce(input.data(), input.data() + input.size(), input2.data(), init, BinaryPredicatePlus(), BinaryPredicateMul()); + TEST_EQUAL(tr, approximatelyEqual(res_std, res_simd, 10 * std::numeric_limits::epsilon()), true); + } + {//non aligned input/ouput + const auto input(DataGenerator>(size, m_generator)); + const auto input2(DataGenerator>(size, m_generator)); + auto res_std = std::inner_product(input.data(), input.data() + input.size(), input2.data(), init); + auto res_simd = simdpp::transform_reduce(input.data(), input.data() + input.size(), input2.data(), init, BinaryPredicatePlus(), BinaryPredicateMul()); + TEST_EQUAL(tr, approximatelyEqual(res_std, res_simd, 10 * std::numeric_limits::epsilon()), true); + } + } + } + std::vector m_sizes; + GeneratorRandom m_generator; +}; + +template + void test_transform_reduce_type_unary(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + {// + vector_aligned_t ivect={(T)0,T(1)}; + auto res=transform_reduce(ivect.data(),ivect.data()+ivect.size(),(T)0,BinaryPredicatePlus(),UnaryPredicateSquare()); + auto expected=std::inner_product( ivect.data(),ivect.data()+ivect.size(), ivect.data(), T(0)); + TEST_EQUAL(tr, expected,res); + } + {// + vector_aligned_t ivect(50); + std::iota(begin(ivect),end(ivect),(T)1); + auto res=transform_reduce(ivect.data(),ivect.data()+ivect.size(),(T)0,BinaryPredicatePlus(),UnaryPredicateSquare()); + auto expected=std::inner_product( ivect.data(),ivect.data()+ivect.size(), ivect.data(), T(0)); + TEST_EQUAL(tr, expected,res); + } + TransformReduceUnaryFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144 });//0 generate null ptr inputs/ouput + fuzzing(tr); +} +template + void test_transform_reduce_type_binary(TestResultsSet& ts, TestReporter& tr) +{ + using namespace simdpp; + using vector_t = std::vector; + using vector_aligned_t = std::vector::alignment>>; + using vector_aligned_u = std::vector::alignment>>; + { + vector_aligned_t ivect(50); + std::iota(begin(ivect),end(ivect),(T)1); + vector_aligned_u ivect2(50); + std::iota(begin(ivect2),end(ivect2),(T)1); + auto res = transform_reduce( ivect.data(),ivect.data()+ivect.size(),ivect2.data(),T(1), BinaryPredicatePlus(), BinaryPredicateMul()); + auto expected = std::inner_product( ivect.data(), ivect.data()+ivect.size(), ivect2.data(), T(1)); + TEST_EQUAL(tr, expected,res); + } + TransformReduceBinaryFuzzingTest fuzzing({ 1,3,5,8,21,55,89,144 });//0 generate null ptr inputs/ouput + fuzzing(tr); +} + +void test_transform_reduce(TestResults& res, TestReporter& tr) +{ + using namespace simdpp; + TestResultsSet& ts = res.new_results_set("transform_reduce"); + test_transform_reduce_type_unary(ts, tr); + test_transform_reduce_type_unary(ts, tr); + // test_transform_reduce_type_unary(ts, tr); //FIXME + // test_transform_reduce_type_unary(ts, tr); //FIXME + // test_transform_reduce_type_unary(ts, tr); //FIXME + // test_transform_reduce_type_unary(ts, tr); //FIXME + // test_transform_reduce_type_unary(ts, tr); //FIXME + // test_transform_reduce_type_unary(ts, tr); //FIXME + // test_transform_reduce_type_unary(ts, tr); //FIXME + // test_transform_reduce_type_unary(ts, tr); //FIXME + + test_transform_reduce_type_binary(ts, tr); + test_transform_reduce_type_binary(ts, tr); + // test_transform_reduce_type_binary(ts,tr); //FIXME + // test_transform_reduce_type_binary(ts, tr); //FIXME + // test_transform_reduce_type_binary(ts, tr); //FIXME + // test_transform_reduce_type_binary(ts, tr); //FIXME + // test_transform_reduce_type_binary(ts, tr); //FIXME + // test_transform_reduce_type_binary(ts, tr); //FIXME + // test_transform_reduce_type_binary(ts, tr); //FIXME + // test_transform_reduce_type_binary(ts, tr); //FIXME + +} } // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/utils/test_helpers.h b/test/utils/test_helpers.h index 7078ce9a..1bd15c85 100644 --- a/test/utils/test_helpers.h +++ b/test/utils/test_helpers.h @@ -1,5 +1,5 @@ /* Copyright (C) 2012 Povilas Kanapickas - + Copyright (C) 2018 Thomas Retornaz Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -16,6 +16,9 @@ #include "test_reporter.h" #include #include +#include +#include +#include inline void set_round_to_zero() @@ -90,7 +93,7 @@ class TestData { TestData& operator=(const TestData& other) { data_ = other.data_; - return (*this); + return *this; } template @@ -116,7 +119,6 @@ class TestData { std::vector> data_; }; - /* A bunch of overloads that wrap the TestSuite::push() method. The push() method accepts a type enum plus a pointer; the wrapper overloads determine the type enum from the type of the supplied argument. @@ -283,6 +285,88 @@ void print_vector_numeric(std::ostream& out, const V& v) print_vector_numeric(out, GetElementType::value, v.length, block.data()); } + +//TR to be moved elsewhere ? + +template +struct GeneratorConstant +{ + GeneratorConstant(T constant) { m_constant = constant; } + T operator()() { return m_constant; } + T m_constant; +}; + +template +struct GeneratorIota +{ + GeneratorIota(T start) { m_current = start; } + T operator()() { return ++m_current; } + T m_current; +}; + +template +struct GeneratorRandom; + +template <> +struct GeneratorRandom +{ + GeneratorRandom() :m_inner_random_generator(std::random_device()()), m_dis(0, UINT8_MAX) {} + uint8_t operator()() { return m_dis(m_inner_random_generator); } + + std::mt19937 m_inner_random_generator; + std::uniform_int_distribution m_dis; +}; + +template<> +struct GeneratorRandom +{ + GeneratorRandom() :m_inner_random_generator(std::random_device()()), m_dis(INT8_MIN, INT8_MAX) {} + int8_t operator()() { return m_dis(m_inner_random_generator); } + + std::mt19937 m_inner_random_generator; + std::uniform_int_distribution m_dis; +}; + +template +struct GeneratorRandom::value>::type> +{ + GeneratorRandom() :m_inner_random_generator(std::random_device()()), m_dis() {} + T operator()() + { + return m_dis(m_inner_random_generator); + } + std::mt19937 m_inner_random_generator; + std::uniform_int_distribution m_dis; +}; + +template +struct GeneratorRandom::value>::type> +{ + GeneratorRandom() :m_inner_random_generator(std::random_device()()), m_dis() {} + T operator()() + { + return m_dis(m_inner_random_generator); + } + std::mt19937 m_inner_random_generator; + std::uniform_real_distribution m_dis; +}; + +template +decltype(auto) DataGeneratorAligned(std::size_t size, Generator gen) +{ + std::vector::alignment>> vect(size); + std::generate(vect.begin(), vect.end(), gen); + return vect; +} + +template +decltype(auto) DataGenerator(std::size_t size, Generator gen) +{ + std::vector vect(size); + std::generate(vect.begin(), vect.end(), gen); + return vect; +} + } // namespace SIMDPP_ARCH_NAMESPACE // we are supposed to call this from within the test function which is in @@ -688,6 +772,43 @@ void test_cmp_equal(TestReporter& tr, const T1& a1, const T2& a2, line, file); } + +template +void test_cmp_equal_collections_impl(TestReporter& tr, + const Container1& a1, const Container2& a2, + bool expected_equal, unsigned line, const char* file) +{ + bool sucess_size= (a1.size()== a2.size()); + if (!sucess_size) { + tr.add_result(false); + print_separator(tr.out()); + print_file_info(tr.out(), file, line); + tr.out() << " Container Size not equal:\n"; + tr.out() << " Container1 Size is:"< +void test_cmp_equal_collections(TestReporter& tr, const Container1& a1, const Container2& a2, + bool expected_equal, unsigned line, const char* file) +{ + static_assert(std::is_same::value, //TR to be relaxed for comparable types? + "Invalid types for comparison"); + test_cmp_equal_collections_impl(tr, a1, a2, expected_equal,line, file); +} + #define TEST_EQUAL(TR, V1, V2) \ do { test_cmp_equal(TR, V1, V2, true, __LINE__, __FILE__); } while(0) @@ -700,4 +821,8 @@ void test_cmp_equal(TestReporter& tr, const T1& a1, const T2& a2, #define TEST_NOT_EQUAL_MEMORY(TR, E1, E2, COUNT) \ do { test_cmp_memory((TR), (E1), (E2), (COUNT), false, __LINE__, __FILE__); } while(0) +#define TEST_EQUAL_COLLECTIONS(TR, C1, C2) \ + do { test_cmp_equal_collections(TR, C1, C2, true, __LINE__, __FILE__); } while(0) + + #endif From d8b2eda579b9f61a7912e90c0665d2f6b8062fde Mon Sep 17 00:00:00 2001 From: RETORNAZ Date: Mon, 9 Apr 2018 19:28:16 +0200 Subject: [PATCH 19/23] issue #107 gcc and c++11 only compil fix --- test/insn/all_of.cc | 9 +++++---- test/insn/any_of.cc | 8 ++++---- test/insn/copy.cc | 6 +++--- test/insn/count.cc | 6 +++--- test/insn/count_if.cc | 6 +++--- test/insn/equal.cc | 4 ++-- test/insn/find.cc | 6 +++--- test/insn/find_if.cc | 6 +++--- test/insn/find_if_not.cc | 6 +++--- test/insn/lexicographical_compare.cc | 6 ++---- test/insn/max.cc | 2 +- test/insn/max_element.cc | 8 ++++---- test/insn/min.cc | 2 +- test/insn/min_element.cc | 8 ++++---- test/insn/none_of.cc | 8 ++++---- test/insn/reduce.cc | 10 +++++----- test/insn/transform.cc | 8 ++++---- test/utils/test_helpers.h | 12 +++++++----- 18 files changed, 61 insertions(+), 60 deletions(-) diff --git a/test/insn/all_of.cc b/test/insn/all_of.cc index 0d639346..262a4b71 100644 --- a/test/insn/all_of.cc +++ b/test/insn/all_of.cc @@ -10,6 +10,7 @@ #include "../utils/test_results.h" #include #include +#include #include //algorithm #include @@ -45,26 +46,26 @@ struct AllOffFuzzingTest { {//aligned input/ouput predicate match const auto input(DataGeneratorAligned>(size, m_generator)); - auto res_std=std::all_of(cbegin(input), cend(input), predEqualTen); + auto res_std=std::all_of(input.cbegin(), input.cend(), predEqualTen); auto res_simd=simdpp::all_of(input.data(), input.data() + input.size(),predEqualTen); TEST_EQUAL(tr, res_std, res_simd); } {//non aligned input/ouput predicate match const auto input(DataGenerator>(size, m_generator)); - auto res_std = std::all_of(cbegin(input), cend(input), predEqualTen); + auto res_std=std::all_of(input.cbegin(), input.cend(), predEqualTen); auto res_simd = simdpp::all_of(input.data(), input.data() + input.size(), predEqualTen); TEST_EQUAL(tr, res_std, res_simd); } {//aligned input/ouput predicate fail const auto input(DataGeneratorAligned>(size, m_generator)); - auto res_std = std::all_of(cbegin(input), cend(input), predEqualFive); + auto res_std=std::all_of(input.cbegin(), input.cend(), predEqualFive); auto res_simd = simdpp::all_of(input.data(), input.data() + input.size(), predEqualFive); TEST_EQUAL(tr, res_std, res_simd); } {//non aligned input/ouput predicate fail const auto input(DataGenerator>(size, m_generator)); - auto res_std = std::all_of(cbegin(input), cend(input), predEqualFive); + auto res_std=std::all_of(input.cbegin(), input.cend(), predEqualFive); auto res_simd = simdpp::all_of(input.data(), input.data() + input.size(), predEqualFive); TEST_EQUAL(tr, res_std, res_simd); } diff --git a/test/insn/any_of.cc b/test/insn/any_of.cc index dae18923..051804fc 100644 --- a/test/insn/any_of.cc +++ b/test/insn/any_of.cc @@ -46,27 +46,27 @@ struct AnyOffFuzzingTest {//aligned input/ouput predicate match auto input(DataGeneratorAligned>(size, m_generator)); input[(size_t)(rand() % input.size())]=10; - auto res_std = std::any_of(cbegin(input), cend(input), predEqualTen); + auto res_std = std::any_of(input.cbegin(), input.cend(), predEqualTen); auto res_simd = simdpp::any_of(input.data(), input.data() + input.size(), predEqualTen); TEST_EQUAL(tr, res_std, res_simd); } {//non aligned input/ouput predicate match auto input(DataGenerator>(size, m_generator)); input[(size_t)(rand() % input.size())] = 10; - auto res_std = std::any_of(cbegin(input), cend(input), predEqualTen); + auto res_std = std::any_of(input.cbegin(), input.cend(), predEqualTen); auto res_simd = simdpp::any_of(input.data(), input.data() + input.size(), predEqualTen); TEST_EQUAL(tr, res_std, res_simd); } {//aligned input/ouput predicate fail auto input(DataGeneratorAligned>(size, m_generator)); - auto res_std = std::any_of(cbegin(input), cend(input), predEqualTen); + auto res_std = std::any_of(input.cbegin(), input.cend(), predEqualTen); auto res_simd = simdpp::any_of(input.data(), input.data() + input.size(), predEqualTen); TEST_EQUAL(tr, res_std, res_simd); } {//non aligned input/ouput predicate fail auto input(DataGenerator>(size, m_generator)); - auto res_std = std::any_of(cbegin(input), cend(input), predEqualTen); + auto res_std = std::any_of(input.cbegin(), input.cend(), predEqualTen); auto res_simd = simdpp::any_of(input.data(), input.data() + input.size(), predEqualTen); TEST_EQUAL(tr, res_std, res_simd); } diff --git a/test/insn/copy.cc b/test/insn/copy.cc index 88970dc4..fb032f96 100644 --- a/test/insn/copy.cc +++ b/test/insn/copy.cc @@ -30,7 +30,7 @@ struct CopyFuzzingTest auto input(DataGeneratorAligned>(size, m_generator)); std::vector::alignment>> expected(size); std::vector::alignment>> output(size); - std::copy(cbegin(input),cend(input),begin(expected)); + std::copy(input.cbegin(), input.cend(),begin(expected)); simdpp::copy(input.data(), input.data()+input.size(), output.data()); TEST_EQUAL_COLLECTIONS(tr,output,expected); } @@ -38,7 +38,7 @@ struct CopyFuzzingTest auto input(DataGenerator>(size, m_generator)); std::vector expected(size); std::vector output(size); - std::copy(cbegin(input), cend(input), begin(expected)); + std::copy(input.cbegin(), input.cend(), begin(expected)); simdpp::copy(input.data(), input.data() + input.size(), output.data()); TEST_EQUAL_COLLECTIONS(tr, output, expected); } @@ -106,4 +106,4 @@ void test_copy(TestResults& res, TestReporter& tr) test_copy_type(ts, tr); } -} // namespace SIMDPP_ARCH_NAMESPACE \ No newline at end of file +} // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/count.cc b/test/insn/count.cc index 7cba5304..124802ae 100644 --- a/test/insn/count.cc +++ b/test/insn/count.cc @@ -28,14 +28,14 @@ struct CountFuzzingTest {//aligned input/ouput auto input(DataGeneratorAligned>(size, m_generator)); input[(input.size()-1)%2]=(T)0; - auto res_std=std::count(cbegin(input), cend(input),(T)42); + auto res_std=std::count(input.cbegin(), input.cend(),(T)42); auto res_simd=simdpp::count(input.data(), input.data() + input.size(), (T)42); TEST_EQUAL(tr, res_std, res_simd); } {//unaligned input/ouput auto input(DataGenerator>(size, m_generator)); input[(input.size()-1) % 2] = (T)0; - auto res_std = std::count(cbegin(input), cend(input), (T)42); + auto res_std = std::count(input.cbegin(), input.cend(), (T)42); auto res_simd = simdpp::count(input.data(), input.data() + input.size(), (T)42); TEST_EQUAL(tr, res_std, res_simd); @@ -86,4 +86,4 @@ void test_count(TestResults& res, TestReporter& tr) test_count_type(ts, tr); } -} // namespace SIMDPP_ARCH_NAMESPACE \ No newline at end of file +} // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/count_if.cc b/test/insn/count_if.cc index 4b5f8437..4cd58dc3 100644 --- a/test/insn/count_if.cc +++ b/test/insn/count_if.cc @@ -44,14 +44,14 @@ struct CountIfFuzzingTest {//aligned input/ouput auto input(DataGeneratorAligned>(size, m_generator)); input[(input.size() - 1) % 2] = (T)0; - auto res_std = std::count_if(cbegin(input), cend(input),pred); + auto res_std = std::count_if(input.cbegin(), input.cend(),pred); auto res_simd = simdpp::count_if(input.data(), input.data() + input.size(), pred); TEST_EQUAL(tr, res_std, res_simd); } {//unaligned input/ouput auto input(DataGenerator>(size, m_generator)); input[(input.size() - 1) % 2] = (T)0; - auto res_std = std::count_if(cbegin(input), cend(input), pred); + auto res_std = std::count_if(input.cbegin(), input.cend(), pred); auto res_simd = simdpp::count_if(input.data(), input.data() + input.size(), pred); TEST_EQUAL(tr, res_std, res_simd); @@ -103,4 +103,4 @@ void test_count_if(TestResults& res, TestReporter& tr) test_count_if_type(ts, tr); } -} // namespace SIMDPP_ARCH_NAMESPACE \ No newline at end of file +} // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/equal.cc b/test/insn/equal.cc index eb440c32..42b74834 100644 --- a/test/insn/equal.cc +++ b/test/insn/equal.cc @@ -27,14 +27,14 @@ struct EqualFuzzingTest {//aligned input/ouput auto input(DataGeneratorAligned>(size, m_generator)); auto input2(DataGeneratorAligned>(size, m_generator)); - auto res_std = std::equal(cbegin(input), cend(input), cbegin(input2)); + auto res_std = std::equal(input.cbegin(), input.cend(), input2.cbegin()); auto res_simd = simdpp::equal(input.data(), input.data() + input.size(), input2.data()); TEST_EQUAL(tr, res_std, res_simd); } {//unaligned input/ouput auto input(DataGenerator>(size, m_generator)); auto input2(DataGenerator>(size, m_generator)); - auto res_std = std::equal(cbegin(input), cend(input), cbegin(input2)); + auto res_std = std::equal(input.cbegin(), input.cend(),input2.cbegin()); auto res_simd = simdpp::equal(input.data(), input.data() + input.size(), input2.data()); TEST_EQUAL(tr, res_std, res_simd); diff --git a/test/insn/find.cc b/test/insn/find.cc index 7dc8c7ad..348dcfaf 100644 --- a/test/insn/find.cc +++ b/test/insn/find.cc @@ -27,13 +27,13 @@ struct FindFuzzingTest const auto val = ((size - 1) / 2)+1; {//aligned input/ouput auto input(DataGeneratorAligned>(size, m_generator)); - auto res_std = std::find(cbegin(input), cend(input), val); + auto res_std = std::find(input.cbegin(), input.cend(), val); auto res_simd = simdpp::find(input.data(), input.data() + input.size(),val); TEST_EQUAL(tr, *res_std, *res_simd); } {//unaligned input/ouput auto input(DataGenerator>(size, m_generator)); - auto res_std = std::find(cbegin(input), cend(input), val); + auto res_std = std::find(input.cbegin(), input.cend(), val); auto res_simd = simdpp::find(input.data(), input.data() + input.size(), val); TEST_EQUAL(tr, *res_std, *res_simd); } @@ -89,4 +89,4 @@ void test_find(TestResults& res, TestReporter& tr) test_find_type(ts, tr); } -} // namespace SIMDPP_ARCH_NAMESPACE \ No newline at end of file +} // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/find_if.cc b/test/insn/find_if.cc index 9d090309..5d6e269f 100644 --- a/test/insn/find_if.cc +++ b/test/insn/find_if.cc @@ -42,13 +42,13 @@ struct FindIfFuzzingTest const auto pred = UnaryPredicateSupValue((T)((size - 1) / 2)); {//aligned input/ouput auto input(DataGeneratorAligned>(size, m_generator)); - auto res_std = std::find_if(cbegin(input), cend(input), pred); + auto res_std = std::find_if(input.cbegin(), input.cend(),pred); auto res_simd = simdpp::find_if(input.data(), input.data() + input.size(), pred); TEST_EQUAL(tr, *res_std, *res_simd); } {//unaligned input/ouput auto input(DataGenerator>(size, m_generator)); - auto res_std = std::find_if(cbegin(input), cend(input), pred); + auto res_std = std::find_if(input.cbegin(), input.cend(), pred); auto res_simd = simdpp::find_if(input.data(), input.data() + input.size(), pred); TEST_EQUAL(tr, *res_std, *res_simd); @@ -108,4 +108,4 @@ void test_find_if(TestResults& res, TestReporter& tr) test_find_if_type(ts, tr); } -} // namespace SIMDPP_ARCH_NAMESPACE \ No newline at end of file +} // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/find_if_not.cc b/test/insn/find_if_not.cc index 6fe7a088..693b2320 100644 --- a/test/insn/find_if_not.cc +++ b/test/insn/find_if_not.cc @@ -42,13 +42,13 @@ struct FindIfNotFuzzingTest const auto pred = UnaryPredicateInfValue((T)((size - 1) / 2)); {//aligned input/ouput auto input(DataGeneratorAligned>(size, m_generator)); - auto res_std = std::find_if_not(cbegin(input), cend(input), pred); + auto res_std = std::find_if_not(input.cbegin(), input.cend(), pred); auto res_simd = simdpp::find_if_not(input.data(), input.data() + input.size(), pred); TEST_EQUAL(tr, *res_std, *res_simd); } {//unaligned input/ouput auto input(DataGenerator>(size, m_generator)); - auto res_std = std::find_if_not(cbegin(input), cend(input), pred); + auto res_std = std::find_if_not(input.cbegin(), input.cend(), pred); auto res_simd = simdpp::find_if_not(input.data(), input.data() + input.size(), pred); TEST_EQUAL(tr, *res_std, *res_simd); @@ -108,4 +108,4 @@ void test_find_if_not(TestResults& res, TestReporter& tr) test_find_if_not_type(ts, tr); } -} // namespace SIMDPP_ARCH_NAMESPACE \ No newline at end of file +} // namespace SIMDPP_ARCH_NAMESPACE diff --git a/test/insn/lexicographical_compare.cc b/test/insn/lexicographical_compare.cc index dd7e76cf..94b6d972 100644 --- a/test/insn/lexicographical_compare.cc +++ b/test/insn/lexicographical_compare.cc @@ -29,18 +29,16 @@ struct LexicographicalCompareFuzzingTest {//aligned input/ouput auto input(DataGeneratorAligned>(size, m_generator1)); auto input2(DataGeneratorAligned>(size, m_generator2)); - auto res_std = std::lexicographical_compare(cbegin(input), cend(input), cbegin(input2), cend(input2)); + auto res_std = std::lexicographical_compare(input.cbegin(), input.cend(), input2.cbegin(), input2.cend()); auto res_simd = simdpp::lexicographical_compare(input.data(), input.data() + input.size(), input2.data(), input2.data() + input2.size() ); TEST_EQUAL(tr, res_std, res_simd); } {//unaligned input/ouput auto input(DataGenerator>(size, m_generator1)); auto input2(DataGenerator>(size, m_generator2)); - auto res_std = std::lexicographical_compare(cbegin(input), cend(input), cbegin(input2), cend(input2)); + auto res_std = std::lexicographical_compare(input.cbegin(), input.cend(), input2.cbegin(), input2.cend()); auto res_simd = simdpp::lexicographical_compare(input.data(), input.data() + input.size(), input2.data(), input2.data() + input2.size()); TEST_EQUAL(tr, res_std, res_simd); - - } } } diff --git a/test/insn/max.cc b/test/insn/max.cc index 29c2c9bd..8b51bdad 100644 --- a/test/insn/max.cc +++ b/test/insn/max.cc @@ -34,7 +34,7 @@ typename Container::value_type ExtractMaxFromContainer(const Container& cont) { typedef typename Container::value_type value_type; value_type current = std::numeric_limits::lowest(); - auto it=cbegin(cont),itend=cend(cont); + auto it=cont.cbegin(),itend=cont.cend(); for (; it != itend; ++it) { current=std::max(current,*it); diff --git a/test/insn/max_element.cc b/test/insn/max_element.cc index 2c4e3c83..684642a1 100644 --- a/test/insn/max_element.cc +++ b/test/insn/max_element.cc @@ -41,26 +41,26 @@ struct MaxElementFuzzingTest { {//aligned input/ouput auto input(DataGeneratorAligned>(size, m_generator)); - auto res_std = std::max_element(cbegin(input), cend(input)); + auto res_std = std::max_element(input.cbegin(), input.cend()); auto res_simd = simdpp::max_element(input.data(), input.data() + input.size()); TEST_EQUAL(tr, *res_std,*res_simd); } {//unaligned input/ouput auto input(DataGenerator>(size, m_generator)); - auto res_std = std::max_element(cbegin(input), cend(input)); + auto res_std = std::max_element(input.cbegin(), input.cend()); auto res_simd = simdpp::max_element(input.data(), input.data() + input.size()); TEST_EQUAL(tr, *res_std, *res_simd); } {//aligned input/ouput + predicate auto input(DataGeneratorAligned>(size, m_generator)); - auto res_std = std::max_element(cbegin(input), cend(input), cmpOPGreater); + auto res_std = std::max_element(input.cbegin(), input.cend(), cmpOPGreater); auto res_simd = simdpp::max_element(input.data(), input.data() + input.size(), cmpOPGreater); TEST_EQUAL(tr, *res_std, *res_simd); } {//unaligned input/ouput + predicate auto input(DataGenerator>(size, m_generator)); - auto res_std = std::max_element(cbegin(input), cend(input), cmpOPGreater); + auto res_std = std::max_element(input.cbegin(), input.cend(), cmpOPGreater); auto res_simd = simdpp::max_element(input.data(), input.data() + input.size(), cmpOPGreater); TEST_EQUAL(tr, *res_std, *res_simd); } diff --git a/test/insn/min.cc b/test/insn/min.cc index e7fe43b5..f4aec34e 100644 --- a/test/insn/min.cc +++ b/test/insn/min.cc @@ -34,7 +34,7 @@ typename Container::value_type ExtractMinFromContainer(const Container& cont) { typedef typename Container::value_type value_type; value_type current = std::numeric_limits::max(); - auto it = cbegin(cont), itend = cend(cont); + auto it = cont.cbegin(), itend = cont.cend(); for (; it != itend; ++it) { current = std::min(current, *it); diff --git a/test/insn/min_element.cc b/test/insn/min_element.cc index c7d83843..cbd8d117 100644 --- a/test/insn/min_element.cc +++ b/test/insn/min_element.cc @@ -41,14 +41,14 @@ struct MinElementFuzzingTest {//aligned input/ouput auto input(DataGeneratorAligned>(size, m_generator)); std::reverse(std::begin(input), std::end(input)); - auto res_std = std::min_element(cbegin(input), cend(input)); + auto res_std = std::min_element(input.cbegin(), input.cend()); auto res_simd = simdpp::min_element(input.data(), input.data() + input.size()); TEST_EQUAL(tr, *res_std, *res_simd); } {//unaligned input/ouput auto input(DataGenerator>(size, m_generator)); std::reverse(std::begin(input), std::end(input)); - auto res_std = std::min_element(cbegin(input), cend(input)); + auto res_std = std::min_element(input.cbegin(), input.cend()); auto res_simd = simdpp::min_element(input.data(), input.data() + input.size()); TEST_EQUAL(tr, *res_std, *res_simd); @@ -56,7 +56,7 @@ struct MinElementFuzzingTest {//aligned input/ouput + predicate auto input(DataGeneratorAligned>(size, m_generator)); std::reverse(std::begin(input), std::end(input)); - auto res_std = std::min_element(cbegin(input), cend(input), cmpOPGreater); + auto res_std = std::min_element(input.cbegin(), input.cend(), cmpOPGreater); auto res_simd = simdpp::min_element(input.data(), input.data() + input.size(), cmpOPGreater); TEST_EQUAL(tr, *res_std, *res_simd); @@ -64,7 +64,7 @@ struct MinElementFuzzingTest {//unaligned input/ouput + predicate auto input(DataGenerator>(size, m_generator)); std::reverse(std::begin(input), std::end(input)); - auto res_std = std::min_element(cbegin(input), cend(input), cmpOPGreater); + auto res_std = std::min_element(input.cbegin(), input.cend(), cmpOPGreater); auto res_simd = simdpp::min_element(input.data(), input.data() + input.size(), cmpOPGreater); TEST_EQUAL(tr, *res_std, *res_simd); } diff --git a/test/insn/none_of.cc b/test/insn/none_of.cc index e33c06c6..89f5b453 100644 --- a/test/insn/none_of.cc +++ b/test/insn/none_of.cc @@ -45,27 +45,27 @@ struct NoneOffFuzzingTest {//aligned input/ouput auto input(DataGeneratorAligned>(size, m_generator)); input[(size_t)(rand() % input.size())] = 10; - auto res_std = std::none_of(cbegin(input), cend(input), predEqualTen); + auto res_std = std::none_of(input.cbegin(), input.cend(), predEqualTen); auto res_simd = simdpp::none_of(input.data(), input.data() + input.size(), predEqualTen); TEST_EQUAL(tr, res_std, res_simd); } {//non aligned input/ouput auto input(DataGenerator>(size, m_generator)); input[(size_t)(rand() % input.size())] = 10; - auto res_std = std::none_of(cbegin(input), cend(input), predEqualTen); + auto res_std = std::none_of(input.cbegin(), input.cend(), predEqualTen); auto res_simd = simdpp::none_of(input.data(), input.data() + input.size(), predEqualTen); TEST_EQUAL(tr, res_std, res_simd); } {//aligned input/ouput auto input(DataGeneratorAligned>(size, m_generator)); - auto res_std = std::none_of(cbegin(input), cend(input), predEqualTen); + auto res_std = std::none_of(input.cbegin(), input.cend(), predEqualTen); auto res_simd = simdpp::none_of(input.data(), input.data() + input.size(), predEqualTen); TEST_EQUAL(tr, res_std, res_simd); } {//non aligned input/ouput auto input(DataGenerator>(size, m_generator)); - auto res_std = std::none_of(cbegin(input), cend(input), predEqualTen); + auto res_std = std::none_of(input.cbegin(), input.cend(), predEqualTen); auto res_simd = simdpp::none_of(input.data(), input.data() + input.size(), predEqualTen); TEST_EQUAL(tr, res_std, res_simd); } diff --git a/test/insn/reduce.cc b/test/insn/reduce.cc index e3b48ad6..daa4ba6f 100644 --- a/test/insn/reduce.cc +++ b/test/insn/reduce.cc @@ -51,14 +51,14 @@ struct ReduceFuzzingTest { {//aligned input/ouput const auto input(DataGeneratorAligned>(size, m_generator)); - auto res_std = std::accumulate(cbegin(input), cend(input), init); + auto res_std = std::accumulate(input.cbegin(), input.cend(), init); //auto res_std=std::reduce(cbegin(input), cend(input),init); auto res_simd = simdpp::reduce(input.data(), input.data() + input.size(), init); TEST_EQUAL(tr, approximatelyEqual(res_std, res_simd, 10 * std::numeric_limits::epsilon()), true); } {//non aligned input/ouput const auto input(DataGenerator>(size, m_generator)); - auto res_std = std::accumulate(cbegin(input), cend(input), init); + auto res_std = std::accumulate(input.cbegin(), input.cend(), init); //auto res_std = std::reduce(cbegin(input), cend(input), init); auto res_simd = simdpp::reduce(input.data(), input.data() + input.size(), init); TEST_EQUAL(tr, approximatelyEqual(res_std, res_simd, 10 * std::numeric_limits::epsilon()), true); @@ -83,14 +83,14 @@ struct ReduceBinaryFuzzingTest {//aligned input/ouput const auto input(DataGeneratorAligned>(size, m_generator)); //auto res_std = std::reduce(cbegin(input), cend(input),init, opPlus); - auto res_std = std::accumulate(cbegin(input), cend(input), init, opPlus); + auto res_std = std::accumulate(input.cbegin(), input.cend(), init, opPlus); auto res_simd = simdpp::reduce(input.data(), input.data() + input.size(), init, neutral, opPlus); TEST_EQUAL(tr, approximatelyEqual(res_std, res_simd, 10 * std::numeric_limits::epsilon()), true); } {//non aligned input/ouput const auto input(DataGenerator>(size, m_generator)); //auto res_std = std::reduce(cbegin(input), cend(input), init, opPlus); - auto res_std = std::accumulate(cbegin(input), cend(input), init, opPlus); + auto res_std = std::accumulate(input.cbegin(), input.cend(), init, opPlus); auto res_simd = simdpp::reduce(input.data(), input.data() + input.size(), init, neutral, opPlus); TEST_EQUAL(tr, approximatelyEqual(res_std, res_simd, 10 * std::numeric_limits::epsilon()), true); } @@ -186,4 +186,4 @@ void test_reduce(TestResults& res, TestReporter& tr) test_reducebinop_type(ts, tr);*/ } -} \ No newline at end of file +} diff --git a/test/insn/transform.cc b/test/insn/transform.cc index 8f4b5f6a..adabb809 100644 --- a/test/insn/transform.cc +++ b/test/insn/transform.cc @@ -65,7 +65,7 @@ struct TransformUnaryFuzzingTest const auto input(DataGeneratorAligned>(size, m_generator)); std::vector::alignment>> expected(size); std::vector::alignment>> output(size); - std::transform(cbegin(input), cend(input), begin(expected), opPlusOne); + std::transform(input.cbegin(), input.cend(),expected.begin(), opPlusOne); simdpp::transform(input.data(), input.data() + input.size(), output.data(), opPlusOne); TEST_EQUAL_COLLECTIONS(tr, output, expected); } @@ -73,7 +73,7 @@ struct TransformUnaryFuzzingTest const auto input(DataGenerator>(size, m_generator)); std::vector expected(size); std::vector output(size); - std::transform(cbegin(input), cend(input), begin(expected), opPlusOne); + std::transform(input.cbegin(), input.cend(),expected.begin(), opPlusOne); simdpp::transform(input.data(), input.data() + input.size(), output.data(), opPlusOne); TEST_EQUAL_COLLECTIONS(tr, output, expected); } @@ -97,7 +97,7 @@ struct TransformBinaryFuzzingTest const auto input2(DataGeneratorAligned>(size, m_generator)); std::vector::alignment>> expected(size); std::vector::alignment>> output(size); - std::transform(cbegin(input1), cend(input1), cbegin(input2), begin(expected), opPlus); + std::transform(input1.cbegin(), input1.cend(), input2.cbegin(), expected.begin(), opPlus); simdpp::transform(input1.data(), input1.data() + input1.size(), input2.data(), output.data(), opPlus); TEST_EQUAL_COLLECTIONS(tr, output, expected); } @@ -106,7 +106,7 @@ struct TransformBinaryFuzzingTest const auto input2(DataGenerator>(size, m_generator)); std::vector expected(size); std::vector output(size); - std::transform(cbegin(input1), cend(input1), cbegin(input2), begin(expected), opPlus); + std::transform(input1.cbegin(), input1.cend(), input2.cbegin(), expected.begin(), opPlus); simdpp::transform(input1.data(), input1.data() + input1.size(), input2.data(), output.data(), opPlus); TEST_EQUAL_COLLECTIONS(tr, output, expected); } diff --git a/test/utils/test_helpers.h b/test/utils/test_helpers.h index 1bd15c85..bd6ce269 100644 --- a/test/utils/test_helpers.h +++ b/test/utils/test_helpers.h @@ -352,7 +352,8 @@ struct GeneratorRandom::val }; template -decltype(auto) DataGeneratorAligned(std::size_t size, Generator gen) +//decltype(auto) DataGeneratorAligned(std::size_t size, Generator gen) +std::vector::alignment>> DataGeneratorAligned(std::size_t size, Generator gen) { std::vector::alignment>> vect(size); std::generate(vect.begin(), vect.end(), gen); @@ -360,7 +361,8 @@ decltype(auto) DataGeneratorAligned(std::size_t size, Generator gen) } template -decltype(auto) DataGenerator(std::size_t size, Generator gen) +//decltype(auto) DataGenerator(std::size_t size, Generator gen) +std::vector DataGenerator(std::size_t size, Generator gen) { std::vector vect(size); std::generate(vect.begin(), vect.end(), gen); @@ -787,8 +789,8 @@ void test_cmp_equal_collections_impl(TestReporter& tr, tr.out() << " Container1 Size is:"< void test_cmp_equal_collections(TestReporter& tr, const Container1& a1, const Container2& a2, bool expected_equal, unsigned line, const char* file) { - static_assert(std::is_same::value, //TR to be relaxed for comparable types? + static_assert(std::is_same::value, //TR to be relaxed for comparable types? "Invalid types for comparison"); test_cmp_equal_collections_impl(tr, a1, a2, expected_equal,line, file); } From 9a3636a492734f28a65ae3fe0d736267930cce11 Mon Sep 17 00:00:00 2001 From: thomas retornaz Date: Wed, 11 Apr 2018 07:18:03 +0200 Subject: [PATCH 20/23] issue #107 * Try to fix visual 2013/2015 compilation issues * enforce const/inline and noexcept for predicate --- simdpp/algorithm/copy.h | 4 ++-- simdpp/algorithm/equal.h | 4 ++-- simdpp/algorithm/find.h | 4 ++-- simdpp/algorithm/lexicographical_compare.h | 4 ++-- simdpp/algorithm/replace.h | 4 ++-- simdpp/algorithm/replace_if.h | 4 ++-- simdpp/setup_arch.h | 6 ++++++ test/insn/all_of.cc | 4 ++-- test/insn/any_of.cc | 4 ++-- test/insn/count_if.cc | 4 ++-- test/insn/find_if.cc | 4 ++-- test/insn/find_if_not.cc | 4 ++-- test/insn/max.cc | 4 ++-- test/insn/max_element.cc | 4 ++-- test/insn/min.cc | 4 ++-- test/insn/min_element.cc | 4 ++-- test/insn/none_of.cc | 4 ++-- test/insn/reduce.cc | 4 ++-- test/insn/replace_if.cc | 4 ++-- test/insn/transform.cc | 8 ++++---- test/insn/transform_reduce.cc | 12 ++++++------ 21 files changed, 52 insertions(+), 46 deletions(-) diff --git a/simdpp/algorithm/copy.h b/simdpp/algorithm/copy.h index a826c748..9a2a3385 100644 --- a/simdpp/algorithm/copy.h +++ b/simdpp/algorithm/copy.h @@ -24,12 +24,12 @@ T* copy(T const* first, T const* last, T* out) struct UnaryOpCopy { using simd_type_T = typename simd_traits::simd_type; - SIMDPP_INL T operator()(T const &a) const noexcept + SIMDPP_INL T operator()(T const &a) const SIMDPP_NOEXCEPT { return a; } - SIMDPP_INL simd_type_T operator()(simd_type_T const &a) const noexcept + SIMDPP_INL simd_type_T operator()(simd_type_T const &a) const SIMDPP_NOEXCEPT { return a; } diff --git a/simdpp/algorithm/equal.h b/simdpp/algorithm/equal.h index ea1209ff..092c798b 100644 --- a/simdpp/algorithm/equal.h +++ b/simdpp/algorithm/equal.h @@ -89,8 +89,8 @@ bool equal(const T* first1, const T* last1, const T* first2) { using simd_type_T = typename simd_traits::simd_type; using simd_mask_T = typename simd_traits::simd_mask_type; - bool operator()(const T& a0,const T& a1) {return a0==a1;} - simd_mask_T operator()(const simd_type_T& a0,const simd_type_T& a1) {return cmp_eq(a0,a1);} + SIMDPP_INL bool operator()(const T& a0,const T& a1) const SIMDPP_NOEXCEPT {return a0==a1;} + SIMDPP_INL simd_mask_T operator()(const simd_type_T& a0,const simd_type_T& a1) const SIMDPP_NOEXCEPT {return cmp_eq(a0,a1);} }; return equal(first1,last1,first2,local_bynary_predicate_equal()); } diff --git a/simdpp/algorithm/find.h b/simdpp/algorithm/find.h index 663e8ef2..213f6dba 100644 --- a/simdpp/algorithm/find.h +++ b/simdpp/algorithm/find.h @@ -31,8 +31,8 @@ T const* find(T const* first, T const* last, U val) using simd_mask_T = typename simd_traits::simd_mask_type; using simd_type_T = typename simd_traits::simd_type; - bool operator()(T a) const { return a == m_val; } - simd_mask_T operator()(const simd_type_T& a) const { return cmp_eq(a, m_val_simd); } + SIMDPP_INL bool operator()(T a) const SIMDPP_NOEXCEPT { return a == m_val; } + SIMDPP_INL simd_mask_T operator()(const simd_type_T& a) const SIMDPP_NOEXCEPT { return cmp_eq(a, m_val_simd); } private: T m_val; simd_type_T m_val_simd; diff --git a/simdpp/algorithm/lexicographical_compare.h b/simdpp/algorithm/lexicographical_compare.h index 35dd05d1..0862a220 100644 --- a/simdpp/algorithm/lexicographical_compare.h +++ b/simdpp/algorithm/lexicographical_compare.h @@ -112,8 +112,8 @@ bool lexicographical_compare(const T* first1, const T* last1, const T* first2, c local_binary_predicate_less():on(splat(T(1))),off(splat(T(0))) {} - bool operator()(T a0,T a1) { return a0::simd_type; local_predicate(const T & old_val, const T & new_val) : m_old_val(old_val), m_new_val(new_val),m_old_val_simd(splat(old_val)), m_new_val_simd(splat(new_val)) {} - T operator()( const T& a) const { return a == m_old_val ? m_new_val : a;} - simd_type_T operator()(const simd_type_T& a) const { return blend(m_new_val_simd,a,cmp_eq(a,m_old_val_simd)); } + SIMDPP_INL T operator()( const T& a) const SIMDPP_NOEXCEPT { return a == m_old_val ? m_new_val : a;} + SIMDPP_INL simd_type_T operator()(const simd_type_T& a) const SIMDPP_NOEXCEPT { return blend(m_new_val_simd,a,cmp_eq(a,m_old_val_simd)); } T m_old_val, m_new_val; simd_type_T m_old_val_simd, m_new_val_simd; diff --git a/simdpp/algorithm/replace_if.h b/simdpp/algorithm/replace_if.h index a592b63c..5c3f8b5e 100644 --- a/simdpp/algorithm/replace_if.h +++ b/simdpp/algorithm/replace_if.h @@ -26,8 +26,8 @@ void replace_if(T* first, T* last, UnaryPredicate pred , const T& new_val) using simd_type_T = typename simd_traits::simd_type; local_predicate(const UnaryPredicate& pred,const T & new_val) : m_new_val(new_val),m_new_val_simd(splat(new_val)),m_pred(pred) {} - T operator()( const T& a) const { return m_pred(a) ? m_new_val : a;} - simd_type_T operator()(const simd_type_T& a) const { return blend(m_new_val_simd,a,m_pred(a)); } + SIMDPP_INL T operator()( const T& a) const SIMDPP_NOEXCEPT { return m_pred(a) ? m_new_val : a;} + SIMDPP_INL simd_type_T operator()(const simd_type_T& a) const SIMDPP_NOEXCEPT { return blend(m_new_val_simd,a,m_pred(a)); } T m_new_val; simd_type_T m_new_val_simd; diff --git a/simdpp/setup_arch.h b/simdpp/setup_arch.h index a644aa94..4cf9ab7c 100644 --- a/simdpp/setup_arch.h +++ b/simdpp/setup_arch.h @@ -390,6 +390,12 @@ #error "Unsupported compiler" #endif +#if _MSC_VER && _MSC_VER<=1800 +#define SIMDPP_NOEXCEPT +#else +#define SIMDPP_NOEXCEPT noexcept +#endif + #define SIMDPP_LIBRARY_VERSION_CXX11 1 #define SIMDPP_LIBRARY_VERSION_CXX98 0 diff --git a/test/insn/all_of.cc b/test/insn/all_of.cc index 262a4b71..e3f1bdbe 100644 --- a/test/insn/all_of.cc +++ b/test/insn/all_of.cc @@ -25,8 +25,8 @@ template using simd_mask_T = typename simdpp::simd_traits::simd_mask_type; using simd_type_T = typename simdpp::simd_traits::simd_type; - bool operator()(T a) const { return a == m_val; } - simd_mask_T operator()(const simd_type_T& a) const { return cmp_eq(a, m_val_simd); } + SIMDPP_INL bool operator()(T a) const SIMDPP_NOEXCEPT { return a == m_val; } + SIMDPP_INL simd_mask_T operator()(const simd_type_T& a) const SIMDPP_NOEXCEPT { return cmp_eq(a, m_val_simd); } T m_val; simd_type_T m_val_simd; diff --git a/test/insn/any_of.cc b/test/insn/any_of.cc index 051804fc..4f8f329c 100644 --- a/test/insn/any_of.cc +++ b/test/insn/any_of.cc @@ -26,8 +26,8 @@ template using simd_mask_T = typename simdpp::simd_traits::simd_mask_type; using simd_type_T = typename simdpp::simd_traits::simd_type; - bool operator()(T a) const { return a == m_val; } - simd_mask_T operator()(const simd_type_T& a) const { return cmp_eq(a, m_val_simd); } + SIMDPP_INL bool operator()(T a) const SIMDPP_NOEXCEPT { return a == m_val; } + SIMDPP_INL simd_mask_T operator()(const simd_type_T& a) const SIMDPP_NOEXCEPT { return cmp_eq(a, m_val_simd); } T m_val; simd_type_T m_val_simd; diff --git a/test/insn/count_if.cc b/test/insn/count_if.cc index 4cd58dc3..1adf0093 100644 --- a/test/insn/count_if.cc +++ b/test/insn/count_if.cc @@ -25,8 +25,8 @@ struct UnaryPredicateEqualValue using simd_mask_T = typename simdpp::simd_traits::simd_mask_type; using simd_type_T = typename simdpp::simd_traits::simd_type; - bool operator()(T a) const { return a == m_val; } - simd_mask_T operator()(const simd_type_T& a) const { return cmp_eq(a, m_val_simd); } + SIMDPP_INL bool operator()(T a) const SIMDPP_NOEXCEPT { return a == m_val; } + SIMDPP_INL simd_mask_T operator()(const simd_type_T& a) const SIMDPP_NOEXCEPT { return cmp_eq(a, m_val_simd); } T m_val; simd_type_T m_val_simd; diff --git a/test/insn/find_if.cc b/test/insn/find_if.cc index 5d6e269f..dd24d4f2 100644 --- a/test/insn/find_if.cc +++ b/test/insn/find_if.cc @@ -23,8 +23,8 @@ struct UnaryPredicateSupValue using simd_mask_T = typename simdpp::simd_traits::simd_mask_type; using simd_type_T = typename simdpp::simd_traits::simd_type; - bool operator()(T a) const { return a > m_val; } - simd_mask_T operator()(const simd_type_T& a) const { return cmp_gt(a, m_val_simd); } + SIMDPP_INL bool operator()(T a) const SIMDPP_NOEXCEPT { return a > m_val; } + SIMDPP_INL simd_mask_T operator()(const simd_type_T& a) const SIMDPP_NOEXCEPT { return cmp_gt(a, m_val_simd); } T m_val; simd_type_T m_val_simd; diff --git a/test/insn/find_if_not.cc b/test/insn/find_if_not.cc index 693b2320..8501cad1 100644 --- a/test/insn/find_if_not.cc +++ b/test/insn/find_if_not.cc @@ -23,8 +23,8 @@ struct UnaryPredicateInfValue using simd_mask_T = typename simdpp::simd_traits::simd_mask_type; using simd_type_T = typename simdpp::simd_traits::simd_type; - bool operator()(T a) const { return a < m_val; } - simd_mask_T operator()(const simd_type_T& a) const { return cmp_lt(a, m_val_simd); } + SIMDPP_INL bool operator()(T a) const SIMDPP_NOEXCEPT { return a < m_val; } + SIMDPP_INL simd_mask_T operator()(const simd_type_T& a) const SIMDPP_NOEXCEPT { return cmp_lt(a, m_val_simd); } private: T m_val; simd_type_T m_val_simd; diff --git a/test/insn/max.cc b/test/insn/max.cc index 8b51bdad..d874e9ee 100644 --- a/test/insn/max.cc +++ b/test/insn/max.cc @@ -24,9 +24,9 @@ struct binary_cmp_greater using simd_mask_T = typename simdpp::simd_traits::simd_mask_type; using simd_type_T = typename simdpp::simd_traits::simd_type; - SIMDPP_INL bool operator()(T a, T b) { return a > b; } + SIMDPP_INL bool operator()(T a, T b) const SIMDPP_NOEXCEPT { return a > b; } - SIMDPP_INL simd_mask_T operator()(const simd_type_T& a, const simd_type_T& b) { return simdpp::cmp_gt(a, b); } + SIMDPP_INL simd_mask_T operator()(const simd_type_T& a, const simd_type_T& b) const SIMDPP_NOEXCEPT { return simdpp::cmp_gt(a, b); } }; template diff --git a/test/insn/max_element.cc b/test/insn/max_element.cc index 684642a1..6d770887 100644 --- a/test/insn/max_element.cc +++ b/test/insn/max_element.cc @@ -24,9 +24,9 @@ struct binary_cmp_greater using simd_mask_T = typename simdpp::simd_traits::simd_mask_type; using simd_type_T = typename simdpp::simd_traits::simd_type; - bool operator()(T a, T b) { return a < b; } + SIMDPP_INL bool operator()(T a, T b) const SIMDPP_NOEXCEPT { return a < b; } - simd_mask_T operator()(const simd_type_T& a, const simd_type_T& b) { return simdpp::cmp_lt(a, b); } + SIMDPP_INL simd_mask_T operator()(const simd_type_T& a, const simd_type_T& b) const SIMDPP_NOEXCEPT { return simdpp::cmp_lt(a, b); } }; template diff --git a/test/insn/min.cc b/test/insn/min.cc index f4aec34e..34ef7662 100644 --- a/test/insn/min.cc +++ b/test/insn/min.cc @@ -24,9 +24,9 @@ struct binary_cmp_greater using simd_mask_T = typename simdpp::simd_traits::simd_mask_type; using simd_type_T = typename simdpp::simd_traits::simd_type; - SIMDPP_INL bool operator()(T a, T b) { return a > b; } + SIMDPP_INL bool operator()(T a, T b) const SIMDPP_NOEXCEPT { return a > b; } - SIMDPP_INL simd_mask_T operator()(const simd_type_T& a, const simd_type_T& b) { return simdpp::cmp_gt(a, b); } + SIMDPP_INL simd_mask_T operator()(const simd_type_T& a, const simd_type_T& b) const SIMDPP_NOEXCEPT { return simdpp::cmp_gt(a, b); } }; template diff --git a/test/insn/min_element.cc b/test/insn/min_element.cc index cbd8d117..f2a4d8ee 100644 --- a/test/insn/min_element.cc +++ b/test/insn/min_element.cc @@ -24,9 +24,9 @@ struct binary_cmp_greater using simd_mask_T = typename simdpp::simd_traits::simd_mask_type; using simd_type_T = typename simdpp::simd_traits::simd_type; - SIMDPP_INL bool operator()(T a, T b) { return a < b; } + SIMDPP_INL bool operator()(T a, T b) const SIMDPP_NOEXCEPT { return a < b; } - SIMDPP_INL simd_mask_T operator()(const simd_type_T& a, const simd_type_T& b) { return simdpp::cmp_lt(a, b); } + SIMDPP_INL simd_mask_T operator()(const simd_type_T& a, const simd_type_T& b) const SIMDPP_NOEXCEPT { return simdpp::cmp_lt(a, b); } }; template diff --git a/test/insn/none_of.cc b/test/insn/none_of.cc index 89f5b453..e9869c66 100644 --- a/test/insn/none_of.cc +++ b/test/insn/none_of.cc @@ -25,8 +25,8 @@ template using simd_mask_T = typename simdpp::simd_traits::simd_mask_type; using simd_type_T = typename simdpp::simd_traits::simd_type; - bool operator()(T a) const { return a == m_val; } - simd_mask_T operator()(const simd_type_T& a) const { return cmp_eq(a, m_val_simd); } + SIMDPP_INL bool operator()(T a) const SIMDPP_NOEXCEPT { return a == m_val; } + SIMDPP_INL simd_mask_T operator()(const simd_type_T& a) const SIMDPP_NOEXCEPT { return cmp_eq(a, m_val_simd); } T m_val; simd_type_T m_val_simd; diff --git a/test/insn/reduce.cc b/test/insn/reduce.cc index daa4ba6f..a170883b 100644 --- a/test/insn/reduce.cc +++ b/test/insn/reduce.cc @@ -21,13 +21,13 @@ struct BinaryOpPlus { public: BinaryOpPlus() {} - SIMDPP_INL T operator()(T const &a0, T const &a1) const noexcept + SIMDPP_INL T operator()(T const &a0, T const &a1) const SIMDPP_NOEXCEPT { return a0 + a1; } template - SIMDPP_INL U operator()(U const &a0, U const &a1) const noexcept + SIMDPP_INL U operator()(U const &a0, U const &a1) const SIMDPP_NOEXCEPT { return a0 + a1; } diff --git a/test/insn/replace_if.cc b/test/insn/replace_if.cc index ded3a280..d56a2a50 100644 --- a/test/insn/replace_if.cc +++ b/test/insn/replace_if.cc @@ -24,8 +24,8 @@ template using simd_mask_T = typename simdpp::simd_traits::simd_mask_type; using simd_type_T = typename simdpp::simd_traits::simd_type; - bool operator()(T a) const { return a == m_val; } - simd_mask_T operator()(const simd_type_T& a) const { return cmp_eq(a, m_val_simd); } + SIMDPP_INL bool operator()(T a) const SIMDPP_NOEXCEPT { return a == m_val; } + SIMDPP_INL simd_mask_T operator()(const simd_type_T& a) const SIMDPP_NOEXCEPT { return cmp_eq(a, m_val_simd); } T m_val; simd_type_T m_val_simd; diff --git a/test/insn/transform.cc b/test/insn/transform.cc index adabb809..96eb8982 100644 --- a/test/insn/transform.cc +++ b/test/insn/transform.cc @@ -21,13 +21,13 @@ struct UnaryOpAddValue T m_val; public: UnaryOpAddValue(T val) :m_val(val) {} - SIMDPP_INL T operator()(T const &a) const noexcept + SIMDPP_INL T operator()(T const &a) const SIMDPP_NOEXCEPT { return m_val + a; } template - SIMDPP_INL U operator()(U const &a) const noexcept + SIMDPP_INL U operator()(U const &a) const SIMDPP_NOEXCEPT { return m_val + a; } @@ -38,13 +38,13 @@ struct BinaryOpAdd { public: BinaryOpAdd() {} - SIMDPP_INL T operator()(T const &a0, T const &a1) const noexcept + SIMDPP_INL T operator()(T const &a0, T const &a1) const SIMDPP_NOEXCEPT { return a0 + a1; } template - SIMDPP_INL U operator()(U const &a0, U const &a1) const noexcept + SIMDPP_INL U operator()(U const &a0, U const &a1) const SIMDPP_NOEXCEPT { using namespace simdpp; return a0 + a1; diff --git a/test/insn/transform_reduce.cc b/test/insn/transform_reduce.cc index 31529256..31a7a15b 100644 --- a/test/insn/transform_reduce.cc +++ b/test/insn/transform_reduce.cc @@ -20,24 +20,24 @@ template struct UnaryPredicateSquare { using simd_type_T = typename simd_traits::simd_type; - T operator()(T a) {return a*a;} - simd_type_T operator()(const simd_type_T& a) {return a*a;} + SIMDPP_INL T operator()(T a) const SIMDPP_NOEXCEPT {return a*a;} + SIMDPP_INL simd_type_T operator()(const simd_type_T& a) const SIMDPP_NOEXCEPT {return a*a;} }; template struct BinaryPredicatePlus { using simd_type_T = typename simd_traits::simd_type; - T operator()(T a0,T a1) {return a0 + a1;} - simd_type_T operator()(const simd_type_T& a0,const simd_type_T& a1) {return a0+a1;} + SIMDPP_INL T operator()(T a0,T a1) const SIMDPP_NOEXCEPT {return a0 + a1;} + SIMDPP_INL simd_type_T operator()(const simd_type_T& a0,const simd_type_T& a1) const SIMDPP_NOEXCEPT {return a0+a1;} }; template struct BinaryPredicateMul { using simd_type_T = typename simd_traits::simd_type; - T operator()(T a0,T a1) {return a0 * a1;} - simd_type_T operator()(const simd_type_T& a0,const simd_type_T& a1) {return a0*a1;} + SIMDPP_INL T operator()(T a0,T a1) const SIMDPP_NOEXCEPT {return a0 * a1;} + SIMDPP_INL simd_type_T operator()(const simd_type_T& a0,const simd_type_T& a1) const SIMDPP_NOEXCEPT {return a0*a1;} }; //from https://stackoverflow.com/questions/17333/what-is-the-most-effective-way-for-float-and-double-comparison From 6ae2a4ab19e4551b3f45b6a3162dc85f487c8eda Mon Sep 17 00:00:00 2001 From: thomas retornaz Date: Mon, 16 Jul 2018 03:41:42 +0200 Subject: [PATCH 21/23] issue #115 Proof of concept * Add google benchmark as ExternalProject * Add three bench suite transform "unary", reduce "unary", load/store Todo: * strange behavior on transform bench suite. STD seems faster than SIMD on MSVC2017 <--- to be checked on gcc>5 * add other cases --- CMakeLists.txt | 5 + bench/CMakeLists.txt | 2 + bench/insn/CMakeLists.txt | 36 +++++ bench/insn/algorithm/reduce_unary.cc | 122 ++++++++++++++ bench/insn/algorithm/transform_unary.cc | 205 ++++++++++++++++++++++++ bench/insn/load_store.cc | 73 +++++++++ bench/insn/main.cc | 13 ++ bench/insn/main.h | 15 ++ bench/thirdparty/CMakeLists.txt | 38 +++++ simdpp/algorithm/helper_input_range.h | 2 +- simdpp/algorithm/transform.h | 5 +- test/insn/reduce.cc | 1 - 12 files changed, 513 insertions(+), 4 deletions(-) create mode 100644 bench/CMakeLists.txt create mode 100644 bench/insn/CMakeLists.txt create mode 100644 bench/insn/algorithm/reduce_unary.cc create mode 100644 bench/insn/algorithm/transform_unary.cc create mode 100644 bench/insn/load_store.cc create mode 100644 bench/insn/main.cc create mode 100644 bench/insn/main.h create mode 100644 bench/thirdparty/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index b12b124c..8202750f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -57,5 +57,10 @@ install(FILES enable_testing() +option(ENABLE_BENCH "Set to on in order to compile bench suite, work only in release mode" OFF) + add_subdirectory(simdpp) add_subdirectory(test) +if(ENABLE_BENCH) + add_subdirectory(bench) +endif() diff --git a/bench/CMakeLists.txt b/bench/CMakeLists.txt new file mode 100644 index 00000000..53f393a9 --- /dev/null +++ b/bench/CMakeLists.txt @@ -0,0 +1,2 @@ +add_subdirectory(thirdparty) +add_subdirectory(insn) \ No newline at end of file diff --git a/bench/insn/CMakeLists.txt b/bench/insn/CMakeLists.txt new file mode 100644 index 00000000..c9050165 --- /dev/null +++ b/bench/insn/CMakeLists.txt @@ -0,0 +1,36 @@ +include_directories(${libsimdpp_SOURCE_DIR}) +include_directories(${GOOGLE_BENCHMARK_INCLUDE_DIRS}) + +set(TEST_BENCH_SOURCES + main.cc + main.h +) + +set(BENCH_INSN_ARCH_SOURCES + algorithm/transform_unary.cc + algorithm/reduce_unary.cc + load_store.cc + ) + +set_property(GLOBAL PROPERTY USE_FOLDERS ON) +foreach(ARCH ${COMPILABLE_ARCHS}}) + simdpp_get_arch_info(CXX_FLAGS DEFINES_LIST SUFFIX ${ARCH}) + #message("Create benchmark for arch : ${SUFFIX} with flags: ${CXX_FLAGS} with defines ${DEFINES_LIST}") + SET(exename "bench_insn_${SUFFIX}") + add_executable(${exename} ${BENCH_INSN_ARCH_SOURCES} ${TEST_BENCH_SOURCES}) + set_target_properties( ${exename} PROPERTIES COMPILE_FLAGS "${CXX_FLAGS}" ) + set_target_properties (${exename} PROPERTIES FOLDER bench) + if(WIN32) + target_link_libraries(${exename} + PUBLIC benchmark + PUBLIC shlwapi.lib + ) + else() + target_link_libraries(${exename} + PUBLIC benchmark + ) + add_dependencies(${exename} ${GOOGLE_BENCHMARK}) + endif() +endforeach(ARCH ${${COMPILABLE_ARCHS}}) + + diff --git a/bench/insn/algorithm/reduce_unary.cc b/bench/insn/algorithm/reduce_unary.cc new file mode 100644 index 00000000..de7546d8 --- /dev/null +++ b/bench/insn/algorithm/reduce_unary.cc @@ -0,0 +1,122 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "benchmark/benchmark.h" +#include +#include +#include +#include +//algorithm +#include + + +namespace { + +template +struct GeneratorConstant +{ + GeneratorConstant(T constant) { m_constant = constant; } + T operator()() { return m_constant; } + T m_constant; +}; + + +template +std::vector::alignment>> DataGenerator(std::size_t size, Generator gen) +{ + + using vector_aligned_t = std::vector::alignment>>; + vector_aligned_t input(size); + std::generate(input.begin(), input.end(), gen); + return input; +} + +/*********************UNARY****************************/ + +template +class ReduceUnaryFixture : public ::benchmark::Fixture { +public: + void SetUp(const ::benchmark::State& st) + { + m_inputvect = DataGenerator>((size_t)st.range(0), GeneratorConstant(1)); + } + void TearDown(const ::benchmark::State&) + { + m_inputvect.clear(); + } + using vector_aligned_t = std::vector::alignment>>; + vector_aligned_t m_inputvect; +}; + +//UINT64_T +BENCHMARK_TEMPLATE_DEFINE_F(ReduceUnaryFixture, UnaryUNINT64_SIMD_Test, uint64_t)(benchmark::State& st) +{ + const auto size= (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(simdpp::reduce(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(),(uint64_t)0)); + } +} +BENCHMARK_REGISTER_F(ReduceUnaryFixture, UnaryUNINT64_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +BENCHMARK_TEMPLATE_DEFINE_F(ReduceUnaryFixture, UnaryUNINT64_STD_Test, uint64_t)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(std::accumulate(m_inputvect.begin(), m_inputvect.end(), (uint64_t)0)); + } +} +BENCHMARK_REGISTER_F(ReduceUnaryFixture, UnaryUNINT64_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + + +//FLOAT +BENCHMARK_TEMPLATE_DEFINE_F(ReduceUnaryFixture, UnaryFLOAT_SIMD_Test, float)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(simdpp::reduce(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), (float)0)); + } +} +BENCHMARK_REGISTER_F(ReduceUnaryFixture, UnaryFLOAT_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +BENCHMARK_TEMPLATE_DEFINE_F(ReduceUnaryFixture, UnaryFLOAT_STD_Test, float)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(std::accumulate(m_inputvect.begin(), m_inputvect.end(), (float)0)); + } +} +BENCHMARK_REGISTER_F(ReduceUnaryFixture, UnaryFLOAT_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +//DOUBLE +BENCHMARK_TEMPLATE_DEFINE_F(ReduceUnaryFixture, UnaryDOUBLE_SIMD_Test, double)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(simdpp::reduce(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), (double)0)); + } +} +BENCHMARK_REGISTER_F(ReduceUnaryFixture, UnaryDOUBLE_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +BENCHMARK_TEMPLATE_DEFINE_F(ReduceUnaryFixture, UnaryDOUBLE_STD_Test, double)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(std::accumulate(m_inputvect.begin(), m_inputvect.end(), (double)0)); + } +} +BENCHMARK_REGISTER_F(ReduceUnaryFixture, UnaryDOUBLE_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + + + +} // namespace \ No newline at end of file diff --git a/bench/insn/algorithm/transform_unary.cc b/bench/insn/algorithm/transform_unary.cc new file mode 100644 index 00000000..d4d42c33 --- /dev/null +++ b/bench/insn/algorithm/transform_unary.cc @@ -0,0 +1,205 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "benchmark/benchmark.h" +#include +#include +#include +#include +//algorithm +#include + + +namespace { + +template< typename T> +struct UnaryOpAddValue +{ + T m_val; +public: + UnaryOpAddValue(T val) :m_val(val) {} + SIMDPP_INL T operator()(T const &a) const SIMDPP_NOEXCEPT + { + return m_val + a; + } + + template + SIMDPP_INL U operator()(U const &a) const SIMDPP_NOEXCEPT + { + return simdpp::add(m_val,a); + } +}; + +template +struct GeneratorConstant +{ + GeneratorConstant(T constant) { m_constant = constant; } + T operator()() { return m_constant; } + T m_constant; +}; + + +template +std::vector::alignment>> DataGenerator(std::size_t size, Generator gen) +{ + + using vector_aligned_t = std::vector::alignment>>; + vector_aligned_t input(size); + std::generate(input.begin(), input.end(), gen); + return input; +} + +/*********************UNARY****************************/ + +template +class TransformUnaryFixture : public ::benchmark::Fixture { +public: + void SetUp(const ::benchmark::State& st) + { + m_inputvect = DataGenerator>((size_t)st.range(0), GeneratorConstant(42)); + m_outputvect.resize((size_t)st.range(0)); + } + void TearDown(const ::benchmark::State&) + { + m_inputvect.clear(); + m_outputvect.clear(); + } + using vector_aligned_t = std::vector::alignment>>; + vector_aligned_t m_inputvect; + vector_aligned_t m_outputvect; + UnaryOpAddValue opPlusOne= UnaryOpAddValue(1); +}; + +//UINT8_T +BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryUNINT8_SIMD_Test, uint8_t)(benchmark::State& st) +{ + const auto size= (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_outputvect.data(), opPlusOne)); + } +} +BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryUNINT8_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryUNINT8_STD_Test, uint8_t)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_outputvect.begin(), opPlusOne)); + } +} +BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryUNINT8_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +//UINT16_T +BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryUNINT16_SIMD_Test, uint16_t)(benchmark::State& st) +{ + const auto size= (size_t)st.range(0); + while (st.KeepRunning()) + { + simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_outputvect.data(), opPlusOne); + } +} +BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryUNINT16_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryUNINT16_STD_Test, uint16_t)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + while (st.KeepRunning()) + { + std::transform(m_inputvect.begin(), m_inputvect.end(), m_outputvect.begin(), opPlusOne); + } +} +BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryUNINT16_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +//UINT32_T +BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryUNINT32_SIMD_Test, uint32_t)(benchmark::State& st) +{ + const auto size= (size_t)st.range(0); + while (st.KeepRunning()) + { + simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_outputvect.data(), opPlusOne); + } +} +BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryUNINT32_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryUNINT32_STD_Test, uint32_t)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + while (st.KeepRunning()) + { + std::transform(m_inputvect.begin(), m_inputvect.end(), m_outputvect.begin(), opPlusOne); + } +} +BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryUNINT32_STD_Test)->Arg(1)->Arg(10)->Arg(31)->Arg(100)->Arg(1000)->Arg(10000); + +//UINT64_T +BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryUNINT64_SIMD_Test, uint64_t)(benchmark::State& st) +{ + const auto size= (size_t)st.range(0); + while (st.KeepRunning()) + { + simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_outputvect.data(), opPlusOne); + } +} +BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryUNINT64_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryUNINT64_STD_Test, uint64_t)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + while (st.KeepRunning()) + { + std::transform(m_inputvect.begin(), m_inputvect.end(), m_outputvect.begin(), opPlusOne); + } +} +BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryUNINT64_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +//FLOAT +BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryFloat_SIMD_Test, float)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + while (st.KeepRunning()) + { + simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_outputvect.data(), opPlusOne); + } +} +BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryFloat_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + + +BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryFloat_STD_Test, float)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + while (st.KeepRunning()) + { + std::transform(m_inputvect.begin(), m_inputvect.end(), m_outputvect.begin(), opPlusOne); + } +} +BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryFloat_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +//DOUBLE +BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryDouble_SIMD_Test, double)(benchmark::State& st) +{ + const auto size= (size_t)st.range(0); + while (st.KeepRunning()) + { + simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_outputvect.data(), opPlusOne); + } +} +BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryDouble_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryDouble_STD_Test, double)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + while (st.KeepRunning()) + { + std::transform(m_inputvect.begin(), m_inputvect.end(), m_outputvect.begin(), opPlusOne); + } +} +BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryDouble_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +} // namespace \ No newline at end of file diff --git a/bench/insn/load_store.cc b/bench/insn/load_store.cc new file mode 100644 index 00000000..aba568bc --- /dev/null +++ b/bench/insn/load_store.cc @@ -0,0 +1,73 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + + +#include "benchmark/benchmark.h" +#include +#include +#include +#include + +namespace { + + template + class LoadStoreFixture : public ::benchmark::Fixture { + public: + void SetUp(const ::benchmark::State& st) + { + m_inputvect.resize((size_t)st.range(0)); + std::fill(m_inputvect.begin(), m_inputvect.end(),(T)42); + m_outputvect.resize((size_t)st.range(0)); + } + void TearDown(const ::benchmark::State&) + { + m_inputvect.clear(); + m_outputvect.clear(); + } + using vector_aligned_t = std::vector::alignment>>; + vector_aligned_t m_inputvect; + vector_aligned_t m_outputvect; + }; + + BENCHMARK_TEMPLATE_DEFINE_F(LoadStoreFixture, UnaryUNINT8_SIMD_Test, uint8_t)(benchmark::State& st) + { + const auto size = (size_t)st.range(0); + using simd_type_T = typename simdpp::simd_traits::simd_type; + const auto simd_size = simd_type_T::base_length; + while (st.KeepRunning()) + { + const auto* ptrin = m_inputvect.data(); + auto* ptrout = m_outputvect.data(); + for (size_t i = 0; i < size; i += simd_size) + { + simd_type_T element = simdpp::load(ptrin); + simdpp::store(ptrout, element); + ptrin += simd_size; + ptrout += simd_size; + } + } + } + BENCHMARK_REGISTER_F(LoadStoreFixture, UnaryUNINT8_SIMD_Test)->Arg(64)->Arg(128)->Arg(256)->Arg(512)->Arg(1024); + + BENCHMARK_TEMPLATE_DEFINE_F(LoadStoreFixture, UnaryUNINT8_STD_Test, uint8_t)(benchmark::State& st) + { + const auto size = (size_t)st.range(0); + using simd_type_T = typename simdpp::simd_traits::simd_type; + const auto simd_size = simd_type_T::base_length; + while (st.KeepRunning()) + { + const auto* ptrin = m_inputvect.data(); + auto* ptrout = m_outputvect.data(); + for (size_t i = 0; i < size; ++i) + { + *ptrout++=*ptrin++; + } + } + } + BENCHMARK_REGISTER_F(LoadStoreFixture, UnaryUNINT8_STD_Test)->Arg(64)->Arg(128)->Arg(256)->Arg(512)->Arg(1024); +} // namespace diff --git a/bench/insn/main.cc b/bench/insn/main.cc new file mode 100644 index 00000000..d50df491 --- /dev/null +++ b/bench/insn/main.cc @@ -0,0 +1,13 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + + +#include "main.h" +#include "benchmark/benchmark.h" + +BENCHMARK_MAIN(); \ No newline at end of file diff --git a/bench/insn/main.h b/bench/insn/main.h new file mode 100644 index 00000000..75f10597 --- /dev/null +++ b/bench/insn/main.h @@ -0,0 +1,15 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#ifndef SIMDPP_MAIN_INSN_BENCH_H +#define SIMDPP_MAIN_INSN_BENCH_H + +#include + +#endif //SIMDPP_MAIN_INSN_BENCH_H + diff --git a/bench/thirdparty/CMakeLists.txt b/bench/thirdparty/CMakeLists.txt new file mode 100644 index 00000000..1316a733 --- /dev/null +++ b/bench/thirdparty/CMakeLists.txt @@ -0,0 +1,38 @@ +cmake_minimum_required(VERSION 3.0) + +#Adapted from https://github.com/dream3d/Dream3DSdkBuild/blob/master/Boost.cmake +message("Create ExternalProject google benchmark has thirdpartie lib") + +include(ExternalProject) +# clone approach +find_package(git REQUIRED) +ExternalProject_Add(googlebenchmark + GIT_REPOSITORY https://github.com/google/benchmark.git + GIT_TAG master + PREFIX ${CMAKE_CURRENT_BINARY_DIR} + #--Configure step------------- + CMAKE_ARGS + -DBUILD_TESTING:BOOL=OFF + -DBENCHMARK_ENABLE_TESTING:BOOL=OFF + -DBENCHMARK_ENABLE_LTO:BOOL=OFF + -DCMAKE_BUILD_TYPE=Release + #--Build step----------------- + UPDATE_COMMAND "" # Skip annoying updates for every build + #--Install step----------------- + INSTALL_COMMAND "" + ) + +ExternalProject_Get_Property(googlebenchmark TMP_DIR STAMP_DIR DOWNLOAD_DIR SOURCE_DIR BINARY_DIR INSTALL_DIR) +message("Build googlebenchmark src ${SOURCE_DIR} in ${BINARY_DIR}") +set_property(GLOBAL PROPERTY USE_FOLDERS ON) +set(GOOGLE_BENCHMARK_INCLUDE_DIRS "${SOURCE_DIR}/include") +set(GOOGLE_BENCHMARK_INCLUDE_DIRS ${GOOGLE_BENCHMARK_INCLUDE_DIRS} PARENT_SCOPE) +set(GOOGLE_BENCHMARK googlebenchmark PARENT_SCOPE) +set_target_properties (googlebenchmark PROPERTIES FOLDER bench) +add_library(benchmark STATIC IMPORTED GLOBAL) #MARK AS IMPORTED AND GLOBAL SCOPE +set_target_properties (benchmark PROPERTIES FOLDER bench) +if(WIN32) +set_target_properties(benchmark PROPERTIES IMPORTED_LOCATION ${BINARY_DIR}/src/Release/benchmark.lib) +else() +set_target_properties(benchmark PROPERTIES IMPORTED_LOCATION ${BINARY_DIR}/src/Release/benchmark.a) +endif() \ No newline at end of file diff --git a/simdpp/algorithm/helper_input_range.h b/simdpp/algorithm/helper_input_range.h index c4fd7670..19a649f7 100644 --- a/simdpp/algorithm/helper_input_range.h +++ b/simdpp/algorithm/helper_input_range.h @@ -28,7 +28,7 @@ Extract from contigous range [first,last[ - Note epilogue equals [size_simd_loop,stop[ */ template -const std::pair helper_input_range(const T* first, const T* last) +const std::pair SIMDPP_INL helper_input_range(const T* first, const T* last) { #ifndef SIMDPP_DEBUG //precondition debug mode if (!first) diff --git a/simdpp/algorithm/transform.h b/simdpp/algorithm/transform.h index 945c0e21..447f1619 100644 --- a/simdpp/algorithm/transform.h +++ b/simdpp/algorithm/transform.h @@ -21,6 +21,7 @@ #include #include #include +#include namespace simdpp { namespace SIMDPP_ARCH_NAMESPACE { @@ -53,7 +54,6 @@ U* transform(T const* first, T const* last, U* out, UnOp f) const auto size_prologue_loop = range.first; const auto size_simd_loop = range.second; - auto i = 0u; //---prologue @@ -61,6 +61,7 @@ U* transform(T const* first, T const* last, U* out, UnOp f) { *out++ = f(*first++); } + //---main simd loop if (detail::is_aligned(out, alignment)) { @@ -76,7 +77,7 @@ U* transform(T const* first, T const* last, U* out, UnOp f) { for (; i < size_simd_loop; i += simd_size) { - simd_type_T element = load(first); + simd_type_T element = load_u(first); store_u(out, f(element)); first += simd_size; out += simd_size; diff --git a/test/insn/reduce.cc b/test/insn/reduce.cc index a170883b..32bfe24a 100644 --- a/test/insn/reduce.cc +++ b/test/insn/reduce.cc @@ -12,7 +12,6 @@ Distributed under the Boost Software License, Version 1.0. #include #include #include -#include namespace SIMDPP_ARCH_NAMESPACE { From 5977ee9745eb7a97bb706b1244b51d0995478ddf Mon Sep 17 00:00:00 2001 From: RETORNAZ Date: Fri, 20 Jul 2018 14:43:55 +0200 Subject: [PATCH 22/23] #issue 115 linux/gcc fix --- bench/insn/CMakeLists.txt | 15 ++++++++++++++- bench/thirdparty/CMakeLists.txt | 8 ++++---- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/bench/insn/CMakeLists.txt b/bench/insn/CMakeLists.txt index c9050165..4dbc7603 100644 --- a/bench/insn/CMakeLists.txt +++ b/bench/insn/CMakeLists.txt @@ -20,6 +20,18 @@ foreach(ARCH ${COMPILABLE_ARCHS}}) add_executable(${exename} ${BENCH_INSN_ARCH_SOURCES} ${TEST_BENCH_SOURCES}) set_target_properties( ${exename} PROPERTIES COMPILE_FLAGS "${CXX_FLAGS}" ) set_target_properties (${exename} PROPERTIES FOLDER bench) + if(SIMDPP_MSVC) + if(CMAKE_SIZEOF_VOID_P EQUAL 4) + # enable _vectorcall on i386 builds (only works on MSVC 2013) + set_target_properties(${exename} PROPERTIES COMPILE_FLAGS "/Gv") + endif() + elseif(SIMDPP_MSVC_INTEL) + set_target_properties(${exename} PROPERTIES COMPILE_FLAGS "/Qstd=c++11") + else() + # Xcode clang linker spends very long time in deduplication pass when + # linking the test executable unless -fvisibility-inlines-hidden is passed. + set_target_properties(${exename} PROPERTIES COMPILE_FLAGS "-std=c++11 -O2 -Wall -Wextra -fvisibility-inlines-hidden") + endif() if(WIN32) target_link_libraries(${exename} PUBLIC benchmark @@ -27,7 +39,8 @@ foreach(ARCH ${COMPILABLE_ARCHS}}) ) else() target_link_libraries(${exename} - PUBLIC benchmark + PUBLIC benchmark + PUBLIC pthread ) add_dependencies(${exename} ${GOOGLE_BENCHMARK}) endif() diff --git a/bench/thirdparty/CMakeLists.txt b/bench/thirdparty/CMakeLists.txt index 1316a733..9166fe08 100644 --- a/bench/thirdparty/CMakeLists.txt +++ b/bench/thirdparty/CMakeLists.txt @@ -1,11 +1,11 @@ -cmake_minimum_required(VERSION 3.0) +cmake_minimum_required(VERSION 2.8.0) #Adapted from https://github.com/dream3d/Dream3DSdkBuild/blob/master/Boost.cmake message("Create ExternalProject google benchmark has thirdpartie lib") include(ExternalProject) # clone approach -find_package(git REQUIRED) +find_package(git QUIET) ExternalProject_Add(googlebenchmark GIT_REPOSITORY https://github.com/google/benchmark.git GIT_TAG master @@ -34,5 +34,5 @@ set_target_properties (benchmark PROPERTIES FOLDER bench) if(WIN32) set_target_properties(benchmark PROPERTIES IMPORTED_LOCATION ${BINARY_DIR}/src/Release/benchmark.lib) else() -set_target_properties(benchmark PROPERTIES IMPORTED_LOCATION ${BINARY_DIR}/src/Release/benchmark.a) -endif() \ No newline at end of file +set_target_properties(benchmark PROPERTIES IMPORTED_LOCATION ${BINARY_DIR}/src/libbenchmark.a) +endif() From e28651968e2c6805d20f6d22e415e4da3b376d74 Mon Sep 17 00:00:00 2001 From: thomas retornaz Date: Fri, 26 Oct 2018 20:39:00 +0200 Subject: [PATCH 23/23] * warn -- * add binary flavor of transform reduce bench --- bench/insn/CMakeLists.txt | 4 +- bench/insn/algorithm/reduce_binary.cc | 155 +++++++++++++++++ bench/insn/algorithm/transform_binary.cc | 209 +++++++++++++++++++++++ bench/insn/algorithm/transform_unary.cc | 22 +-- bench/insn/load_store.cc | 191 +++++++++++++++++++++ simdpp/algorithm/helper_input_range.h | 2 +- simdpp/algorithm/reduce.h | 2 +- simdpp/algorithm/transform.h | 4 +- test/insn/transform.cc | 4 +- 9 files changed, 575 insertions(+), 18 deletions(-) create mode 100644 bench/insn/algorithm/reduce_binary.cc create mode 100644 bench/insn/algorithm/transform_binary.cc diff --git a/bench/insn/CMakeLists.txt b/bench/insn/CMakeLists.txt index 4dbc7603..01c3b299 100644 --- a/bench/insn/CMakeLists.txt +++ b/bench/insn/CMakeLists.txt @@ -8,7 +8,9 @@ set(TEST_BENCH_SOURCES set(BENCH_INSN_ARCH_SOURCES algorithm/transform_unary.cc + algorithm/transform_binary.cc algorithm/reduce_unary.cc + algorithm/reduce_binary.cc load_store.cc ) @@ -23,7 +25,7 @@ foreach(ARCH ${COMPILABLE_ARCHS}}) if(SIMDPP_MSVC) if(CMAKE_SIZEOF_VOID_P EQUAL 4) # enable _vectorcall on i386 builds (only works on MSVC 2013) - set_target_properties(${exename} PROPERTIES COMPILE_FLAGS "/Gv") + #set_target_properties(${exename} PROPERTIES COMPILE_FLAGS "/Gv") endif() elseif(SIMDPP_MSVC_INTEL) set_target_properties(${exename} PROPERTIES COMPILE_FLAGS "/Qstd=c++11") diff --git a/bench/insn/algorithm/reduce_binary.cc b/bench/insn/algorithm/reduce_binary.cc new file mode 100644 index 00000000..82c0eb74 --- /dev/null +++ b/bench/insn/algorithm/reduce_binary.cc @@ -0,0 +1,155 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "benchmark/benchmark.h" +#include +#include +#include +#include +//algorithm +#include + + +namespace { + +template< typename T> +struct BinaryOpPlus +{ +public: + BinaryOpPlus() {} + SIMDPP_INL T operator()(T const &a0, T const &a1) const SIMDPP_NOEXCEPT + { + return a0 + a1; + } + + template + SIMDPP_INL U operator()(U const &a0, U const &a1) const SIMDPP_NOEXCEPT + { + return a0 + a1; + } +}; + +template +struct GeneratorConstant +{ + GeneratorConstant(T constant) { m_constant = constant; } + T operator()() { return m_constant; } + T m_constant; +}; + + +template +std::vector::alignment>> DataGenerator(std::size_t size, Generator gen) +{ + + using vector_aligned_t = std::vector::alignment>>; + vector_aligned_t input(size); + std::generate(input.begin(), input.end(), gen); + return input; +} + +/*********************UNARY****************************/ + +template +class ReduceBinaryFixture : public ::benchmark::Fixture { +public: + void SetUp(const ::benchmark::State& st) + { + m_inputvect = DataGenerator>((size_t)st.range(0), GeneratorConstant(1)); + } + void TearDown(const ::benchmark::State&) + { + m_inputvect.clear(); + } + using vector_aligned_t = std::vector::alignment>>; + vector_aligned_t m_inputvect; +}; + +//UINT64_T +BENCHMARK_TEMPLATE_DEFINE_F(ReduceBinaryFixture, BinaryUNINT64_SIMD_Test, uint64_t)(benchmark::State& st) +{ + const auto size= (size_t)st.range(0); + uint64_t init = (uint64_t)0; + auto opPlus = BinaryOpPlus(); + uint64_t neutral = (uint64_t)0; + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(simdpp::reduce(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), init, neutral, opPlus)); + } +} +BENCHMARK_REGISTER_F(ReduceBinaryFixture, BinaryUNINT64_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +BENCHMARK_TEMPLATE_DEFINE_F(ReduceBinaryFixture, BinaryUNINT64_STD_Test, uint64_t)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + uint64_t init = (uint64_t)0; + auto opPlus = BinaryOpPlus(); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(std::accumulate(m_inputvect.cbegin(), m_inputvect.cend(), init, opPlus)); + } +} +BENCHMARK_REGISTER_F(ReduceBinaryFixture, BinaryUNINT64_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + + +//FLOAT +BENCHMARK_TEMPLATE_DEFINE_F(ReduceBinaryFixture, BinaryFLOAT_SIMD_Test, float)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + float init = (float)0; + auto opPlus = BinaryOpPlus(); + float neutral = (float)0; + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(simdpp::reduce(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), init, neutral, opPlus)); + } +} +BENCHMARK_REGISTER_F(ReduceBinaryFixture, BinaryFLOAT_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +BENCHMARK_TEMPLATE_DEFINE_F(ReduceBinaryFixture, BinaryFLOAT_STD_Test, float)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + float init = (float)0; + auto opPlus = BinaryOpPlus(); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(std::accumulate(m_inputvect.cbegin(), m_inputvect.cend(), init, opPlus)); + } +} +BENCHMARK_REGISTER_F(ReduceBinaryFixture, BinaryFLOAT_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + + +//DOUBLE +BENCHMARK_TEMPLATE_DEFINE_F(ReduceBinaryFixture, BinaryDOUBLE_SIMD_Test, double)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + double init = (double)0; + auto opPlus = BinaryOpPlus(); + double neutral = (double)0; + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(simdpp::reduce(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), init, neutral, opPlus)); + } +} +BENCHMARK_REGISTER_F(ReduceBinaryFixture, BinaryDOUBLE_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +BENCHMARK_TEMPLATE_DEFINE_F(ReduceBinaryFixture, BinaryDOUBLE_STD_Test, double)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + double init = (double)0; + auto opPlus = BinaryOpPlus(); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(std::accumulate(m_inputvect.cbegin(), m_inputvect.cend(), init, opPlus)); + } +} +BENCHMARK_REGISTER_F(ReduceBinaryFixture, BinaryDOUBLE_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + + + +} // namespace \ No newline at end of file diff --git a/bench/insn/algorithm/transform_binary.cc b/bench/insn/algorithm/transform_binary.cc new file mode 100644 index 00000000..5ffe1a6e --- /dev/null +++ b/bench/insn/algorithm/transform_binary.cc @@ -0,0 +1,209 @@ +/* Copyright (C) 2018 Povilas Kanapickas + Copyright (C) 2018 Thomas Retornaz + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +#include "benchmark/benchmark.h" +#include +#include +#include +#include +//algorithm +#include + + +namespace { + +template< typename T> +struct BinaryOpAdd +{ +public: + BinaryOpAdd() {} + SIMDPP_INL T operator()(T const &a0, T const &a1) const SIMDPP_NOEXCEPT + { + return a0 + a1; + } + + template + SIMDPP_INL U operator()(U const &a0, U const &a1) const SIMDPP_NOEXCEPT + { + using namespace simdpp; + return a0 + a1; + } +}; + + +template +struct GeneratorConstant +{ + GeneratorConstant(T constant) { m_constant = constant; } + T operator()() { return m_constant; } + T m_constant; +}; + + +template +std::vector::alignment>> DataGenerator(std::size_t size, Generator gen) +{ + + using vector_aligned_t = std::vector::alignment>>; + vector_aligned_t input(size); + std::generate(input.begin(), input.end(), gen); + return input; +} + +/*********************Binary****************************/ + +template +class TransformBinaryFixture : public ::benchmark::Fixture { +public: + void SetUp(const ::benchmark::State& st) + { + m_inputvect = DataGenerator>((size_t)st.range(0), GeneratorConstant(42)); + m_inputvect2 = DataGenerator>((size_t)st.range(0), GeneratorConstant(42)); + m_outputvect.resize((size_t)st.range(0)); + } + void TearDown(const ::benchmark::State&) + { + m_inputvect.clear(); + m_inputvect2.clear(); + m_outputvect.clear(); + } + using vector_aligned_t = std::vector::alignment>>; + vector_aligned_t m_inputvect; + vector_aligned_t m_inputvect2; + vector_aligned_t m_outputvect; + BinaryOpAdd opPlus= BinaryOpAdd(); +}; + +//UINT8_T +BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryUNINT8_SIMD_Test, uint8_t)(benchmark::State& st) +{ + const auto size= (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(),m_inputvect2.data(),m_outputvect.data(), opPlus)); + } +} +BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryUNINT8_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryUNINT8_STD_Test, uint8_t)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_inputvect.begin(),m_outputvect.begin(), opPlus)); + } +} +BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryUNINT8_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +//UINT16_T +BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryUNINT16_SIMD_Test, uint16_t)(benchmark::State& st) +{ + const auto size= (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_inputvect2.data(), m_outputvect.data(), opPlus)); + } +} +BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryUNINT16_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryUNINT16_STD_Test, uint16_t)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_inputvect.begin(), m_outputvect.begin(), opPlus)); + } +} +BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryUNINT16_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +//UINT32_T +BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryUNINT32_SIMD_Test, uint32_t)(benchmark::State& st) +{ + const auto size= (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_inputvect2.data(), m_outputvect.data(), opPlus)); + } +} +BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryUNINT32_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryUNINT32_STD_Test, uint32_t)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_inputvect.begin(), m_outputvect.begin(), opPlus)); + } +} +BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryUNINT32_STD_Test)->Arg(1)->Arg(10)->Arg(31)->Arg(100)->Arg(1000)->Arg(10000); + +//UINT64_T +BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryUNINT64_SIMD_Test, uint64_t)(benchmark::State& st) +{ + const auto size= (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_inputvect2.data(), m_outputvect.data(), opPlus)); + } +} +BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryUNINT64_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryUNINT64_STD_Test, uint64_t)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_inputvect.begin(), m_outputvect.begin(), opPlus)); + } +} +BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryUNINT64_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +//FLOAT +BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryFloat_SIMD_Test, float)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_inputvect2.data(), m_outputvect.data(), opPlus)); + } +} +BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryFloat_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + + +BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryFloat_STD_Test, float)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_inputvect.begin(), m_outputvect.begin(), opPlus)); + } +} +BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryFloat_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +//DOUBLE +BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryDouble_SIMD_Test, double)(benchmark::State& st) +{ + const auto size= (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_inputvect2.data(), m_outputvect.data(), opPlus)); + } +} +BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryDouble_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryDouble_STD_Test, double)(benchmark::State& st) +{ + const auto size = (size_t)st.range(0); + while (st.KeepRunning()) + { + benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_inputvect.begin(), m_outputvect.begin(), opPlus)); + } +} +BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryDouble_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); + +} // namespace \ No newline at end of file diff --git a/bench/insn/algorithm/transform_unary.cc b/bench/insn/algorithm/transform_unary.cc index d4d42c33..b0508f09 100644 --- a/bench/insn/algorithm/transform_unary.cc +++ b/bench/insn/algorithm/transform_unary.cc @@ -26,7 +26,7 @@ struct UnaryOpAddValue SIMDPP_INL T operator()(T const &a) const SIMDPP_NOEXCEPT { return m_val + a; - } + } template SIMDPP_INL U operator()(U const &a) const SIMDPP_NOEXCEPT @@ -102,7 +102,7 @@ BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryUNINT16_SIMD_Test, uint1 const auto size= (size_t)st.range(0); while (st.KeepRunning()) { - simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_outputvect.data(), opPlusOne); + benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_outputvect.data(), opPlusOne)); } } BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryUNINT16_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); @@ -112,7 +112,7 @@ BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryUNINT16_STD_Test, uint16 const auto size = (size_t)st.range(0); while (st.KeepRunning()) { - std::transform(m_inputvect.begin(), m_inputvect.end(), m_outputvect.begin(), opPlusOne); + benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_outputvect.begin(), opPlusOne)); } } BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryUNINT16_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); @@ -123,7 +123,7 @@ BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryUNINT32_SIMD_Test, uint3 const auto size= (size_t)st.range(0); while (st.KeepRunning()) { - simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_outputvect.data(), opPlusOne); + benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_outputvect.data(), opPlusOne)); } } BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryUNINT32_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); @@ -133,7 +133,7 @@ BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryUNINT32_STD_Test, uint32 const auto size = (size_t)st.range(0); while (st.KeepRunning()) { - std::transform(m_inputvect.begin(), m_inputvect.end(), m_outputvect.begin(), opPlusOne); + benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_outputvect.begin(), opPlusOne)); } } BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryUNINT32_STD_Test)->Arg(1)->Arg(10)->Arg(31)->Arg(100)->Arg(1000)->Arg(10000); @@ -144,7 +144,7 @@ BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryUNINT64_SIMD_Test, uint6 const auto size= (size_t)st.range(0); while (st.KeepRunning()) { - simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_outputvect.data(), opPlusOne); + benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_outputvect.data(), opPlusOne)); } } BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryUNINT64_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); @@ -154,7 +154,7 @@ BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryUNINT64_STD_Test, uint64 const auto size = (size_t)st.range(0); while (st.KeepRunning()) { - std::transform(m_inputvect.begin(), m_inputvect.end(), m_outputvect.begin(), opPlusOne); + benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_outputvect.begin(), opPlusOne)); } } BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryUNINT64_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); @@ -165,7 +165,7 @@ BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryFloat_SIMD_Test, float)( const auto size = (size_t)st.range(0); while (st.KeepRunning()) { - simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_outputvect.data(), opPlusOne); + benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_outputvect.data(), opPlusOne)); } } BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryFloat_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); @@ -176,7 +176,7 @@ BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryFloat_STD_Test, float)(b const auto size = (size_t)st.range(0); while (st.KeepRunning()) { - std::transform(m_inputvect.begin(), m_inputvect.end(), m_outputvect.begin(), opPlusOne); + benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_outputvect.begin(), opPlusOne)); } } BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryFloat_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); @@ -187,7 +187,7 @@ BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryDouble_SIMD_Test, double const auto size= (size_t)st.range(0); while (st.KeepRunning()) { - simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_outputvect.data(), opPlusOne); + benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_outputvect.data(), opPlusOne)); } } BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryDouble_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); @@ -197,7 +197,7 @@ BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryDouble_STD_Test, double) const auto size = (size_t)st.range(0); while (st.KeepRunning()) { - std::transform(m_inputvect.begin(), m_inputvect.end(), m_outputvect.begin(), opPlusOne); + benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_outputvect.begin(), opPlusOne)); } } BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryDouble_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000); diff --git a/bench/insn/load_store.cc b/bench/insn/load_store.cc index aba568bc..5a245dd7 100644 --- a/bench/insn/load_store.cc +++ b/bench/insn/load_store.cc @@ -34,6 +34,7 @@ namespace { vector_aligned_t m_outputvect; }; + //UINT8_T BENCHMARK_TEMPLATE_DEFINE_F(LoadStoreFixture, UnaryUNINT8_SIMD_Test, uint8_t)(benchmark::State& st) { const auto size = (size_t)st.range(0); @@ -70,4 +71,194 @@ namespace { } } BENCHMARK_REGISTER_F(LoadStoreFixture, UnaryUNINT8_STD_Test)->Arg(64)->Arg(128)->Arg(256)->Arg(512)->Arg(1024); + + //UINT16 + BENCHMARK_TEMPLATE_DEFINE_F(LoadStoreFixture, UnaryUNINT16_SIMD_Test, uint16_t)(benchmark::State& st) + { + const auto size = (size_t)st.range(0); + using simd_type_T = typename simdpp::simd_traits::simd_type; + const auto simd_size = simd_type_T::base_length; + while (st.KeepRunning()) + { + const auto* ptrin = m_inputvect.data(); + auto* ptrout = m_outputvect.data(); + for (size_t i = 0; i < size; i += simd_size) + { + simd_type_T element = simdpp::load(ptrin); + simdpp::store(ptrout, element); + ptrin += simd_size; + ptrout += simd_size; + } + } + } + BENCHMARK_REGISTER_F(LoadStoreFixture, UnaryUNINT16_SIMD_Test)->Arg(64)->Arg(128)->Arg(256)->Arg(512)->Arg(1024); + + BENCHMARK_TEMPLATE_DEFINE_F(LoadStoreFixture, UnaryUNINT16_STD_Test, uint16_t)(benchmark::State& st) + { + const auto size = (size_t)st.range(0); + using simd_type_T = typename simdpp::simd_traits::simd_type; + const auto simd_size = simd_type_T::base_length; + while (st.KeepRunning()) + { + const auto* ptrin = m_inputvect.data(); + auto* ptrout = m_outputvect.data(); + for (size_t i = 0; i < size; ++i) + { + *ptrout++ = *ptrin++; + } + } + } + BENCHMARK_REGISTER_F(LoadStoreFixture, UnaryUNINT16_STD_Test)->Arg(64)->Arg(128)->Arg(256)->Arg(512)->Arg(1024); + + //UINT32 + BENCHMARK_TEMPLATE_DEFINE_F(LoadStoreFixture, UnaryUNINT32_SIMD_Test, uint32_t)(benchmark::State& st) + { + const auto size = (size_t)st.range(0); + using simd_type_T = typename simdpp::simd_traits::simd_type; + const auto simd_size = simd_type_T::base_length; + while (st.KeepRunning()) + { + const auto* ptrin = m_inputvect.data(); + auto* ptrout = m_outputvect.data(); + for (size_t i = 0; i < size; i += simd_size) + { + simd_type_T element = simdpp::load(ptrin); + simdpp::store(ptrout, element); + ptrin += simd_size; + ptrout += simd_size; + } + } + } + BENCHMARK_REGISTER_F(LoadStoreFixture, UnaryUNINT32_SIMD_Test)->Arg(64)->Arg(128)->Arg(256)->Arg(512)->Arg(1024); + + BENCHMARK_TEMPLATE_DEFINE_F(LoadStoreFixture, UnaryUNINT32_STD_Test, uint32_t)(benchmark::State& st) + { + const auto size = (size_t)st.range(0); + using simd_type_T = typename simdpp::simd_traits::simd_type; + const auto simd_size = simd_type_T::base_length; + while (st.KeepRunning()) + { + const auto* ptrin = m_inputvect.data(); + auto* ptrout = m_outputvect.data(); + for (size_t i = 0; i < size; ++i) + { + *ptrout++ = *ptrin++; + } + } + } + BENCHMARK_REGISTER_F(LoadStoreFixture, UnaryUNINT32_STD_Test)->Arg(64)->Arg(128)->Arg(256)->Arg(512)->Arg(1024); + + //UINT64 + BENCHMARK_TEMPLATE_DEFINE_F(LoadStoreFixture, UnaryUNINT64_SIMD_Test, uint64_t)(benchmark::State& st) + { + const auto size = (size_t)st.range(0); + using simd_type_T = typename simdpp::simd_traits::simd_type; + const auto simd_size = simd_type_T::base_length; + while (st.KeepRunning()) + { + const auto* ptrin = m_inputvect.data(); + auto* ptrout = m_outputvect.data(); + for (size_t i = 0; i < size; i += simd_size) + { + simd_type_T element = simdpp::load(ptrin); + simdpp::store(ptrout, element); + ptrin += simd_size; + ptrout += simd_size; + } + } + } + BENCHMARK_REGISTER_F(LoadStoreFixture, UnaryUNINT64_SIMD_Test)->Arg(64)->Arg(128)->Arg(256)->Arg(512)->Arg(1024); + + BENCHMARK_TEMPLATE_DEFINE_F(LoadStoreFixture, UnaryUNINT64_STD_Test, uint64_t)(benchmark::State& st) + { + const auto size = (size_t)st.range(0); + using simd_type_T = typename simdpp::simd_traits::simd_type; + const auto simd_size = simd_type_T::base_length; + while (st.KeepRunning()) + { + const auto* ptrin = m_inputvect.data(); + auto* ptrout = m_outputvect.data(); + for (size_t i = 0; i < size; ++i) + { + *ptrout++ = *ptrin++; + } + } + } + BENCHMARK_REGISTER_F(LoadStoreFixture, UnaryUNINT64_STD_Test)->Arg(64)->Arg(128)->Arg(256)->Arg(512)->Arg(1024); + + //FLOAT + BENCHMARK_TEMPLATE_DEFINE_F(LoadStoreFixture, UnaryFloat_SIMD_Test, float)(benchmark::State& st) + { + const auto size = (size_t)st.range(0); + using simd_type_T = typename simdpp::simd_traits::simd_type; + const auto simd_size = simd_type_T::base_length; + while (st.KeepRunning()) + { + const auto* ptrin = m_inputvect.data(); + auto* ptrout = m_outputvect.data(); + for (size_t i = 0; i < size; i += simd_size) + { + simd_type_T element = simdpp::load(ptrin); + simdpp::store(ptrout, element); + ptrin += simd_size; + ptrout += simd_size; + } + } + } + BENCHMARK_REGISTER_F(LoadStoreFixture, UnaryFloat_SIMD_Test)->Arg(64)->Arg(128)->Arg(256)->Arg(512)->Arg(1024); + + BENCHMARK_TEMPLATE_DEFINE_F(LoadStoreFixture, UnaryFloat_STD_Test, float)(benchmark::State& st) + { + const auto size = (size_t)st.range(0); + using simd_type_T = typename simdpp::simd_traits::simd_type; + const auto simd_size = simd_type_T::base_length; + while (st.KeepRunning()) + { + const auto* ptrin = m_inputvect.data(); + auto* ptrout = m_outputvect.data(); + for (size_t i = 0; i < size; ++i) + { + *ptrout++ = *ptrin++; + } + } + } + BENCHMARK_REGISTER_F(LoadStoreFixture, UnaryFloat_STD_Test)->Arg(64)->Arg(128)->Arg(256)->Arg(512)->Arg(1024); + + //DOUBLE + BENCHMARK_TEMPLATE_DEFINE_F(LoadStoreFixture, UnaryDouble_SIMD_Test, double)(benchmark::State& st) + { + const auto size = (size_t)st.range(0); + using simd_type_T = typename simdpp::simd_traits::simd_type; + const auto simd_size = simd_type_T::base_length; + while (st.KeepRunning()) + { + const auto* ptrin = m_inputvect.data(); + auto* ptrout = m_outputvect.data(); + for (size_t i = 0; i < size; i += simd_size) + { + simd_type_T element = simdpp::load(ptrin); + simdpp::store(ptrout, element); + ptrin += simd_size; + ptrout += simd_size; + } + } + } + BENCHMARK_REGISTER_F(LoadStoreFixture, UnaryDouble_SIMD_Test)->Arg(64)->Arg(128)->Arg(256)->Arg(512)->Arg(1024); + + BENCHMARK_TEMPLATE_DEFINE_F(LoadStoreFixture, UnaryDouble_STD_Test, double)(benchmark::State& st) + { + const auto size = (size_t)st.range(0); + using simd_type_T = typename simdpp::simd_traits::simd_type; + const auto simd_size = simd_type_T::base_length; + while (st.KeepRunning()) + { + const auto* ptrin = m_inputvect.data(); + auto* ptrout = m_outputvect.data(); + for (size_t i = 0; i < size; ++i) + { + *ptrout++ = *ptrin++; + } + } + } + BENCHMARK_REGISTER_F(LoadStoreFixture, UnaryDouble_STD_Test)->Arg(64)->Arg(128)->Arg(256)->Arg(512)->Arg(1024); } // namespace diff --git a/simdpp/algorithm/helper_input_range.h b/simdpp/algorithm/helper_input_range.h index 19a649f7..fe9fe2c4 100644 --- a/simdpp/algorithm/helper_input_range.h +++ b/simdpp/algorithm/helper_input_range.h @@ -28,7 +28,7 @@ Extract from contigous range [first,last[ - Note epilogue equals [size_simd_loop,stop[ */ template -const std::pair SIMDPP_INL helper_input_range(const T* first, const T* last) +const std::pair SIMDPP_INL helper_input_range(const T* first, const T* last) { #ifndef SIMDPP_DEBUG //precondition debug mode if (!first) diff --git a/simdpp/algorithm/reduce.h b/simdpp/algorithm/reduce.h index 86523e79..04b87eb9 100644 --- a/simdpp/algorithm/reduce.h +++ b/simdpp/algorithm/reduce.h @@ -92,7 +92,7 @@ T reduce(T const* first, T const* last, T init, T neutral, BinOp f) //need neutr const auto size_prologue_loop = range.first; const auto size_simd_loop = range.second; - auto i = 0u; + auto i = 0; simd_type_T accusimd = splat(T(neutral)); //think about product sum //---prologue diff --git a/simdpp/algorithm/transform.h b/simdpp/algorithm/transform.h index 447f1619..af78e85c 100644 --- a/simdpp/algorithm/transform.h +++ b/simdpp/algorithm/transform.h @@ -54,7 +54,7 @@ U* transform(T const* first, T const* last, U* out, UnOp f) const auto size_prologue_loop = range.first; const auto size_simd_loop = range.second; - auto i = 0u; + auto i = 0; //---prologue for (; i < size_prologue_loop; ++i) @@ -127,7 +127,7 @@ U* transform(T1 const* first1, T1 const* last1, T2 const* first2, U* out, BinOp const auto size_simd_loop = range.second; - auto i = 0u; + auto i = 0; //---prologue for (; i < size_prologue_loop; ++i) diff --git a/test/insn/transform.cc b/test/insn/transform.cc index 96eb8982..a0685001 100644 --- a/test/insn/transform.cc +++ b/test/insn/transform.cc @@ -159,7 +159,7 @@ void test_transform_type_binary(TestResultsSet& ts, TestReporter& tr) vector_t expected = { 1,3 }; transform(ivect1.data(), ivect1.data() + ivect1.size(), ivect2.data(), ovect.data(), opPlus); - for (auto i = 0; i < ovect.size(); ++i) //TODO make TEST_EQUAL_COLECTIONS + for (auto i = 0u; i < ovect.size(); ++i) //TODO make TEST_EQUAL_COLECTIONS { TEST_EQUAL(tr, expected[i], ovect[i]); } @@ -171,7 +171,7 @@ void test_transform_type_binary(TestResultsSet& ts, TestReporter& tr) vector_t expected(150, 1); transform(ivect1.data(), ivect1.data() + ivect1.size(), ivect2.data(), ovect.data(), opPlus); - for (auto i = 0; i < ovect.size(); ++i) //TODO make TEST_EQUAL_COLECTIONS + for (auto i = 0u; i < ovect.size(); ++i) //TODO make TEST_EQUAL_COLECTIONS { TEST_EQUAL(tr, expected[i], ovect[i]); }