diff --git a/be/src/vec/common/pod_array.h b/be/src/vec/common/pod_array.h index 0d7cde6503de10..8afb24797ef022 100644 --- a/be/src/vec/common/pod_array.h +++ b/be/src/vec/common/pod_array.h @@ -389,12 +389,12 @@ class PODArray : public PODArrayBase void add_num_element(U&& x, uint32_t num, TAllocatorParams&&... allocator_params) { if (num != 0) { - const auto new_end = this->c_end + this->byte_size(num); - if (UNLIKELY(new_end > this->c_end_of_storage)) { + const auto growth_size = this->byte_size(num); + if (UNLIKELY(this->c_end + growth_size > this->c_end_of_storage)) { this->reserve(this->size() + num); } std::fill(t_end(), t_end() + num, x); - this->c_end = new_end; + this->c_end = this->c_end + growth_size; } } @@ -420,7 +420,9 @@ class PODArray : public PODArrayBase void emplace_back(Args&&... args) { - if (UNLIKELY(this->c_end == this->c_end_of_storage)) this->reserve_for_next_size(); + if (UNLIKELY(this->c_end + sizeof(T) > this->c_end_of_storage)) { + this->reserve_for_next_size(); + } new (t_end()) T(std::forward(args)...); this->c_end += this->byte_size(1); @@ -457,22 +459,6 @@ class PODArray : public PODArrayBasec_end += bytes_to_copy; } - template - void insert(iterator it, It1 from_begin, It2 from_end) { - insert_prepare(from_begin, from_end); - - size_t bytes_to_copy = this->byte_size(from_end - from_begin); - size_t bytes_to_move = (end() - it) * sizeof(T); - - if (UNLIKELY(bytes_to_move)) - memcpy(this->c_end + bytes_to_copy - bytes_to_move, this->c_end - bytes_to_move, - bytes_to_move); - - memcpy(this->c_end - bytes_to_move, reinterpret_cast(&*from_begin), - bytes_to_copy); - this->c_end += bytes_to_copy; - } - template void insert_assume_reserved(It1 from_begin, It2 from_end) { size_t bytes_to_copy = this->byte_size(from_end - from_begin); diff --git a/be/test/vec/common/pod_array_test.cpp b/be/test/vec/common/pod_array_test.cpp new file mode 100644 index 00000000000000..0a136008e71c18 --- /dev/null +++ b/be/test/vec/common/pod_array_test.cpp @@ -0,0 +1,601 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// This file is copied from +// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/tests/gtest_pod_array.cpp +// and modified by Doris + +#include "vec/common/pod_array.h" + +#include + +#include "vec/common/allocator_fwd.h" + +namespace doris { + +TEST(PODArrayTest, PODArrayBasicMove) { + static constexpr size_t initial_bytes = 32; + using Array = vectorized::PODArray, initial_bytes>>; + + { + Array arr; + Array arr2; + arr2 = std::move(arr); + } + + { + Array arr; + + arr.push_back(1); + arr.push_back(2); + arr.push_back(3); + + Array arr2; + + arr2 = std::move(arr); + + ASSERT_EQ(arr2.size(), 3); + ASSERT_EQ(arr2[0], 1); + ASSERT_EQ(arr2[1], 2); + ASSERT_EQ(arr2[2], 3); + + arr = std::move(arr2); + + ASSERT_EQ(arr.size(), 3); + ASSERT_EQ(arr[0], 1); + ASSERT_EQ(arr[1], 2); + ASSERT_EQ(arr[2], 3); + } + + { + Array arr; + + arr.push_back(1); + arr.push_back(2); + arr.push_back(3); + arr.push_back(4); + arr.push_back(5); + + Array arr2; + + arr2 = std::move(arr); + + ASSERT_EQ(arr2.size(), 5); + ASSERT_EQ(arr2[0], 1); + ASSERT_EQ(arr2[1], 2); + ASSERT_EQ(arr2[2], 3); + ASSERT_EQ(arr2[3], 4); + ASSERT_EQ(arr2[4], 5); + + arr = std::move(arr2); + + ASSERT_EQ(arr.size(), 5); + ASSERT_EQ(arr[0], 1); + ASSERT_EQ(arr[1], 2); + ASSERT_EQ(arr[2], 3); + ASSERT_EQ(arr[3], 4); + ASSERT_EQ(arr[4], 5); + } + + { + Array arr; + + arr.push_back(1); + arr.push_back(2); + arr.push_back(3); + + Array arr2; + + arr2.push_back(4); + arr2.push_back(5); + arr2.push_back(6); + arr2.push_back(7); + + arr2 = std::move(arr); + + ASSERT_EQ(arr2.size(), 3); + ASSERT_EQ(arr2[0], 1); + ASSERT_EQ(arr2[1], 2); + ASSERT_EQ(arr2[2], 3); + } + + { + Array arr; + + arr.push_back(1); + arr.push_back(2); + arr.push_back(3); + + Array arr2; + + arr2.push_back(4); + arr2.push_back(5); + arr2.push_back(6); + arr2.push_back(7); + arr2.push_back(8); + + arr = std::move(arr2); + + ASSERT_EQ(arr.size(), 5); + ASSERT_EQ(arr[0], 4); + ASSERT_EQ(arr[1], 5); + ASSERT_EQ(arr[2], 6); + ASSERT_EQ(arr[3], 7); + ASSERT_EQ(arr[4], 8); + } +} + +TEST(PODArrayTest, PODArrayBasicSwap) { + static constexpr size_t initial_bytes = 32; + using Array = vectorized::PODArray, initial_bytes>>; + + { + Array arr; + Array arr2; + arr.swap(arr2); + arr2.swap(arr); + } + + { + Array arr; + + Array arr2; + + arr2.push_back(1); + arr2.push_back(2); + arr2.push_back(3); + + arr.swap(arr2); + + ASSERT_TRUE(arr.size() == 3); + ASSERT_TRUE(arr[0] == 1); + ASSERT_TRUE(arr[1] == 2); + ASSERT_TRUE(arr[2] == 3); + + ASSERT_TRUE(arr2.empty()); + + arr.swap(arr2); + + ASSERT_TRUE(arr.empty()); + + ASSERT_TRUE(arr2.size() == 3); + ASSERT_TRUE(arr2[0] == 1); + ASSERT_TRUE(arr2[1] == 2); + ASSERT_TRUE(arr2[2] == 3); + } + + { + Array arr; + + Array arr2; + + arr2.push_back(1); + arr2.push_back(2); + arr2.push_back(3); + arr2.push_back(4); + arr2.push_back(5); + + arr.swap(arr2); + + ASSERT_TRUE(arr.size() == 5); + ASSERT_TRUE(arr[0] == 1); + ASSERT_TRUE(arr[1] == 2); + ASSERT_TRUE(arr[2] == 3); + ASSERT_TRUE(arr[3] == 4); + ASSERT_TRUE(arr[4] == 5); + + ASSERT_TRUE(arr2.empty()); + + arr.swap(arr2); + + ASSERT_TRUE(arr.empty()); + + ASSERT_TRUE(arr2.size() == 5); + ASSERT_TRUE(arr2[0] == 1); + ASSERT_TRUE(arr2[1] == 2); + ASSERT_TRUE(arr2[2] == 3); + ASSERT_TRUE(arr2[3] == 4); + ASSERT_TRUE(arr2[4] == 5); + } + + { + Array arr; + + arr.push_back(1); + arr.push_back(2); + arr.push_back(3); + + Array arr2; + + arr2.push_back(4); + arr2.push_back(5); + arr2.push_back(6); + + arr.swap(arr2); + + ASSERT_TRUE(arr.size() == 3); + ASSERT_TRUE(arr[0] == 4); + ASSERT_TRUE(arr[1] == 5); + ASSERT_TRUE(arr[2] == 6); + + ASSERT_TRUE(arr2.size() == 3); + ASSERT_TRUE(arr2[0] == 1); + ASSERT_TRUE(arr2[1] == 2); + ASSERT_TRUE(arr2[2] == 3); + + arr.swap(arr2); + + ASSERT_TRUE(arr.size() == 3); + ASSERT_TRUE(arr[0] == 1); + ASSERT_TRUE(arr[1] == 2); + ASSERT_TRUE(arr[2] == 3); + + ASSERT_TRUE(arr2.size() == 3); + ASSERT_TRUE(arr2[0] == 4); + ASSERT_TRUE(arr2[1] == 5); + ASSERT_TRUE(arr2[2] == 6); + } + + { + Array arr; + + arr.push_back(1); + arr.push_back(2); + + Array arr2; + + arr2.push_back(3); + arr2.push_back(4); + arr2.push_back(5); + + arr.swap(arr2); + + ASSERT_TRUE(arr.size() == 3); + ASSERT_TRUE(arr[0] == 3); + ASSERT_TRUE(arr[1] == 4); + ASSERT_TRUE(arr[2] == 5); + + ASSERT_TRUE(arr2.size() == 2); + ASSERT_TRUE(arr2[0] == 1); + ASSERT_TRUE(arr2[1] == 2); + + arr.swap(arr2); + + ASSERT_TRUE(arr.size() == 2); + ASSERT_TRUE(arr[0] == 1); + ASSERT_TRUE(arr[1] == 2); + + ASSERT_TRUE(arr2.size() == 3); + ASSERT_TRUE(arr2[0] == 3); + ASSERT_TRUE(arr2[1] == 4); + ASSERT_TRUE(arr2[2] == 5); + } + + { + Array arr; + + arr.push_back(1); + arr.push_back(2); + arr.push_back(3); + + Array arr2; + + arr2.push_back(4); + arr2.push_back(5); + arr2.push_back(6); + arr2.push_back(7); + arr2.push_back(8); + + arr.swap(arr2); + + ASSERT_TRUE(arr.size() == 5); + ASSERT_TRUE(arr[0] == 4); + ASSERT_TRUE(arr[1] == 5); + ASSERT_TRUE(arr[2] == 6); + ASSERT_TRUE(arr[3] == 7); + ASSERT_TRUE(arr[4] == 8); + + ASSERT_TRUE(arr2.size() == 3); + ASSERT_TRUE(arr2[0] == 1); + ASSERT_TRUE(arr2[1] == 2); + ASSERT_TRUE(arr2[2] == 3); + + arr.swap(arr2); + + ASSERT_TRUE(arr.size() == 3); + ASSERT_TRUE(arr[0] == 1); + ASSERT_TRUE(arr[1] == 2); + ASSERT_TRUE(arr[2] == 3); + + ASSERT_TRUE(arr2.size() == 5); + ASSERT_TRUE(arr2[0] == 4); + ASSERT_TRUE(arr2[1] == 5); + ASSERT_TRUE(arr2[2] == 6); + ASSERT_TRUE(arr2[3] == 7); + ASSERT_TRUE(arr2[4] == 8); + } + + { + Array arr; + + arr.push_back(1); + arr.push_back(2); + arr.push_back(3); + arr.push_back(4); + arr.push_back(5); + + Array arr2; + + arr2.push_back(6); + arr2.push_back(7); + arr2.push_back(8); + arr2.push_back(9); + arr2.push_back(10); + + arr.swap(arr2); + + ASSERT_TRUE(arr.size() == 5); + ASSERT_TRUE(arr[0] == 6); + ASSERT_TRUE(arr[1] == 7); + ASSERT_TRUE(arr[2] == 8); + ASSERT_TRUE(arr[3] == 9); + ASSERT_TRUE(arr[4] == 10); + + ASSERT_TRUE(arr2.size() == 5); + ASSERT_TRUE(arr2[0] == 1); + ASSERT_TRUE(arr2[1] == 2); + ASSERT_TRUE(arr2[2] == 3); + ASSERT_TRUE(arr2[3] == 4); + ASSERT_TRUE(arr2[4] == 5); + + arr.swap(arr2); + + ASSERT_TRUE(arr.size() == 5); + ASSERT_TRUE(arr[0] == 1); + ASSERT_TRUE(arr[1] == 2); + ASSERT_TRUE(arr[2] == 3); + ASSERT_TRUE(arr[3] == 4); + ASSERT_TRUE(arr[4] == 5); + + ASSERT_TRUE(arr2.size() == 5); + ASSERT_TRUE(arr2[0] == 6); + ASSERT_TRUE(arr2[1] == 7); + ASSERT_TRUE(arr2[2] == 8); + ASSERT_TRUE(arr2[3] == 9); + ASSERT_TRUE(arr2[4] == 10); + } +} + +TEST(PODArrayTest, PODArrayBasicSwapMoveConstructor) { + static constexpr size_t initial_bytes = 32; + using Array = vectorized::PODArray, initial_bytes>>; + + { + Array arr; + Array arr2 {std::move(arr)}; + } + + { + Array arr; + + arr.push_back(1); + arr.push_back(2); + arr.push_back(3); + + Array arr2 {std::move(arr)}; + + ASSERT_TRUE(arr.empty()); // NOLINT + + ASSERT_TRUE(arr2.size() == 3); + ASSERT_TRUE(arr2[0] == 1); + ASSERT_TRUE(arr2[1] == 2); + ASSERT_TRUE(arr2[2] == 3); + } + + { + Array arr; + + arr.push_back(1); + arr.push_back(2); + arr.push_back(3); + arr.push_back(4); + arr.push_back(5); + + Array arr2 {std::move(arr)}; + + ASSERT_TRUE(arr.empty()); // NOLINT + + ASSERT_TRUE(arr2.size() == 5); + ASSERT_TRUE(arr2[0] == 1); + ASSERT_TRUE(arr2[1] == 2); + ASSERT_TRUE(arr2[2] == 3); + ASSERT_TRUE(arr2[3] == 4); + ASSERT_TRUE(arr2[4] == 5); + } +} + +// TEST(PODArrayTest, PODArrayInsert) { +// { +// std::string str = "test_string_abacaba"; +// vectorized::PODArray chars; +// chars.insert(chars.end(), str.begin(), str.end()); +// EXPECT_EQ(str, std::string(chars.data(), chars.size())); + +// std::string insert_in_the_middle = "insert_in_the_middle"; +// auto pos = str.size() / 2; +// str.insert(str.begin() + pos, insert_in_the_middle.begin(), insert_in_the_middle.end()); +// chars.insert(chars.begin() + pos, insert_in_the_middle.begin(), insert_in_the_middle.end()); +// EXPECT_EQ(str, std::string(chars.data(), chars.size())); + +// std::string insert_with_resize; +// insert_with_resize.reserve(chars.capacity() * 2); +// char cur_char = 'a'; +// while (insert_with_resize.size() < insert_with_resize.capacity()) { +// insert_with_resize += cur_char; +// if (cur_char == 'z') { +// cur_char = 'a'; +// } else { +// ++cur_char; +// } +// } +// str.insert(str.begin(), insert_with_resize.begin(), insert_with_resize.end()); +// chars.insert(chars.begin(), insert_with_resize.begin(), insert_with_resize.end()); +// EXPECT_EQ(str, std::string(chars.data(), chars.size())); +// } +// { +// vectorized::PODArray values; +// vectorized::PODArray values_to_insert; + +// for (size_t i = 0; i < 120; ++i) { +// values.emplace_back(i); +// } + +// values.insert(values.begin() + 1, values_to_insert.begin(), values_to_insert.end()); +// ASSERT_EQ(values.size(), 120); + +// values_to_insert.emplace_back(0); +// values_to_insert.emplace_back(1); + +// values.insert(values.begin() + 1, values_to_insert.begin(), values_to_insert.end()); +// ASSERT_EQ(values.size(), 122); + +// values_to_insert.clear(); +// for (size_t i = 0; i < 240; ++i) { +// values_to_insert.emplace_back(i); +// } + +// values.insert(values.begin() + 1, values_to_insert.begin(), values_to_insert.end()); +// ASSERT_EQ(values.size(), 362); +// } +// } + +// TEST(PODArrayTest, PODArrayInsertFromItself) +// { +// { +// vectorized::PaddedPODArray array { 1 }; + +// for (size_t i = 0; i < 3; ++i) +// array.insertFromItself(array.begin(), array.end()); + +// vectorized::PaddedPODArray expected {1,1,1,1,1,1,1,1}; +// ASSERT_EQ(array,expected); +// } +// } + +TEST(PODArrayTest, PODArrayAddNumElement) { + static constexpr size_t initial_bytes = 32; + using Array = vectorized::PODArray; + size_t element_size = 8; // sizeof(uint64_t) + { + Array array; + + array.add_num_element(1, 4); + ASSERT_EQ(array.size(), 4); + ASSERT_EQ(array.capacity(), 32 / element_size); + ASSERT_EQ(array, Array({1, 1, 1, 1})); + + // call reserve + array.add_num_element(1, 2); + ASSERT_EQ(array.size(), 6); + ASSERT_EQ(array.capacity(), 64 / element_size); + ASSERT_EQ(array, Array({1, 1, 1, 1, 1, 1})); + + // call reserve + array.add_num_element_without_reserve(1, 1); + ASSERT_EQ(array.size(), 7); + ASSERT_EQ(array.capacity(), 64 / element_size); + ASSERT_EQ(array, Array({1, 1, 1, 1, 1, 1, 1})); + } +} + +TEST(PODArrayTest, PODArrayAssign) { + { + vectorized::PaddedPODArray array; + array.push_back(1); + array.push_back(2); + + array.assign({1, 2, 3}); + + ASSERT_EQ(array.size(), 3); + ASSERT_EQ(array, vectorized::PaddedPODArray({1, 2, 3})); + } + { + vectorized::PaddedPODArray array; + array.push_back(1); + array.push_back(2); + + array.assign({}); + + ASSERT_TRUE(array.empty()); + } + { + vectorized::PaddedPODArray array; + array.assign({}); + + ASSERT_TRUE(array.empty()); + } +} + +TEST(PODArrayTest, PODNoOverallocation) { + /// Check that PaddedPODArray allocates for smaller number of elements than the power of two due to padding. + /// NOTE: It's Ok to change these numbers if you will modify initial size or padding. + + vectorized::PaddedPODArray chars; + std::vector capacities; + + size_t prev_capacity = 0; + for (size_t i = 0; i < 1000000; ++i) { + chars.emplace_back(); + if (chars.capacity() != prev_capacity) { + prev_capacity = chars.capacity(); + capacities.emplace_back(prev_capacity); + } + } + + EXPECT_EQ(capacities, (std::vector {4064, 8160, 16352, 32736, 65504, 131040, 262112, + 524256, 1048544})); +} + +template +struct ItemWithSize { + char v[size] {}; +}; + +TEST(PODArrayTest, PODInsertElementSizeNotMultipleOfLeftPadding) { + using ItemWith24Size = ItemWithSize<24>; + vectorized::PaddedPODArray arr1_initially_empty; + + size_t items_to_insert_size = 120000; + + for (size_t test = 0; test < items_to_insert_size; ++test) { + arr1_initially_empty.emplace_back(); + } + + EXPECT_EQ(arr1_initially_empty.size(), items_to_insert_size); + + vectorized::PaddedPODArray arr2_initially_nonempty; + + for (size_t test = 0; test < items_to_insert_size; ++test) { + arr2_initially_nonempty.emplace_back(); + } + + EXPECT_EQ(arr1_initially_empty.size(), items_to_insert_size); +} + +} // end namespace doris