From fe5d3e4ea9968c7301b430ae547ebad0e0c17ea1 Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Sat, 27 Sep 2025 22:31:48 +0200 Subject: [PATCH 1/6] [ntuple] factor out vector resize logic --- .../ROOT/RField/RFieldSequenceContainer.hxx | 4 ++ tree/ntuple/src/RFieldSequenceContainer.cxx | 42 +++++++++++-------- 2 files changed, 29 insertions(+), 17 deletions(-) diff --git a/tree/ntuple/inc/ROOT/RField/RFieldSequenceContainer.hxx b/tree/ntuple/inc/ROOT/RField/RFieldSequenceContainer.hxx index 78910b219c063..e649aee059fdf 100644 --- a/tree/ntuple/inc/ROOT/RField/RFieldSequenceContainer.hxx +++ b/tree/ntuple/inc/ROOT/RField/RFieldSequenceContainer.hxx @@ -222,6 +222,10 @@ class RVectorField : public RFieldBase { ROOT::Internal::RColumnIndex fNWritten; std::unique_ptr fItemDeleter; + // Ensures that the std::vector pointed to by vec has at least nItems valid elements. + static void ResizeVector(void *vec, std::size_t nItems, std::size_t itemSize, const RFieldBase &itemField, + RDeleter *itemDeleter); + protected: /// Creates a possibly-untyped VectorField. /// If `emulatedFromType` is not nullopt, the field is untyped. If the string is empty, it is a "regular" diff --git a/tree/ntuple/src/RFieldSequenceContainer.cxx b/tree/ntuple/src/RFieldSequenceContainer.cxx index 0f526a0a1b8a0..ee66dd25271c2 100644 --- a/tree/ntuple/src/RFieldSequenceContainer.cxx +++ b/tree/ntuple/src/RFieldSequenceContainer.cxx @@ -583,6 +583,30 @@ std::size_t ROOT::RVectorField::AppendImpl(const void *from) return nbytes + fPrincipalColumn->GetElement()->GetPackedSize(); } +void ROOT::RVectorField::ResizeVector(void *vec, std::size_t nItems, std::size_t itemSize, const RFieldBase &itemField, + RDeleter *itemDeleter) +{ + auto typedValue = static_cast *>(vec); + + // See "semantics of reading non-trivial objects" in RNTuple's Architecture.md + R__ASSERT(itemSize > 0); + const auto oldNItems = typedValue->size() / itemSize; + const bool canRealloc = oldNItems < nItems; + bool allDeallocated = false; + if (itemDeleter) { + allDeallocated = canRealloc; + for (std::size_t i = allDeallocated ? 0 : nItems; i < oldNItems; ++i) { + itemDeleter->operator()(typedValue->data() + (i * itemSize), true /* dtorOnly */); + } + } + typedValue->resize(nItems * itemSize); + if (!(itemField.GetTraits() & kTraitTriviallyConstructible)) { + for (std::size_t i = allDeallocated ? 0 : oldNItems; i < nItems; ++i) { + CallConstructValueOn(itemField, typedValue->data() + (i * itemSize)); + } + } +} + void ROOT::RVectorField::ReadGlobalImpl(ROOT::NTupleSize_t globalIndex, void *to) { auto typedValue = static_cast *>(to); @@ -598,23 +622,7 @@ void ROOT::RVectorField::ReadGlobalImpl(ROOT::NTupleSize_t globalIndex, void *to return; } - // See "semantics of reading non-trivial objects" in RNTuple's Architecture.md - R__ASSERT(fItemSize > 0); - const auto oldNItems = typedValue->size() / fItemSize; - const bool canRealloc = oldNItems < nItems; - bool allDeallocated = false; - if (fItemDeleter) { - allDeallocated = canRealloc; - for (std::size_t i = allDeallocated ? 0 : nItems; i < oldNItems; ++i) { - fItemDeleter->operator()(typedValue->data() + (i * fItemSize), true /* dtorOnly */); - } - } - typedValue->resize(nItems * fItemSize); - if (!(fSubfields[0]->GetTraits() & kTraitTriviallyConstructible)) { - for (std::size_t i = allDeallocated ? 0 : oldNItems; i < nItems; ++i) { - CallConstructValueOn(*fSubfields[0], typedValue->data() + (i * fItemSize)); - } - } + ResizeVector(to, nItems, fItemSize, *fSubfields[0], fItemDeleter.get()); for (std::size_t i = 0; i < nItems; ++i) { CallReadOn(*fSubfields[0], collectionStart + i, typedValue->data() + (i * fItemSize)); From 0cd8365efcb1de270be0ec09b123dd9ed03f94eb Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Sat, 27 Sep 2025 22:35:17 +0200 Subject: [PATCH 2/6] [ntuple] factor out splitting vector field --- tree/ntuple/src/RFieldSequenceContainer.cxx | 199 ++++++++++---------- 1 file changed, 102 insertions(+), 97 deletions(-) diff --git a/tree/ntuple/src/RFieldSequenceContainer.cxx b/tree/ntuple/src/RFieldSequenceContainer.cxx index ee66dd25271c2..b72ce5226290f 100644 --- a/tree/ntuple/src/RFieldSequenceContainer.cxx +++ b/tree/ntuple/src/RFieldSequenceContainer.cxx @@ -13,6 +13,107 @@ #include #include // hardware_destructive_interference_size +namespace { + +/// Retrieve the addresses of the data members of a generic RVec from a pointer to the beginning of the RVec object. +/// Returns pointers to fBegin, fSize and fCapacity in a std::tuple. +std::tuple GetRVecDataMembers(void *rvecPtr) +{ + unsigned char **beginPtr = reinterpret_cast(rvecPtr); + // int32_t fSize is the second data member (after 1 void*) + std::int32_t *size = reinterpret_cast(beginPtr + 1); + R__ASSERT(*size >= 0); + // int32_t fCapacity is the third data member (1 int32_t after fSize) + std::int32_t *capacity = size + 1; + R__ASSERT(*capacity >= -1); + return {beginPtr, size, capacity}; +} + +std::tuple +GetRVecDataMembers(const void *rvecPtr) +{ + return {GetRVecDataMembers(const_cast(rvecPtr))}; +} + +std::size_t EvalRVecValueSize(std::size_t alignOfT, std::size_t sizeOfT, std::size_t alignOfRVecT) +{ + // the size of an RVec is the size of its 4 data-members + optional padding: + // + // data members: + // - void *fBegin + // - int32_t fSize + // - int32_t fCapacity + // - the char[] inline storage, which is aligned like T + // + // padding might be present: + // - between fCapacity and the char[] buffer aligned like T + // - after the char[] buffer + + constexpr auto dataMemberSz = sizeof(void *) + 2 * sizeof(std::int32_t); + + // mimic the logic of RVecInlineStorageSize, but at runtime + const auto inlineStorageSz = [&] { + constexpr unsigned cacheLineSize = R__HARDWARE_INTERFERENCE_SIZE; + const unsigned elementsPerCacheLine = (cacheLineSize - dataMemberSz) / sizeOfT; + constexpr unsigned maxInlineByteSize = 1024; + const unsigned nElements = + elementsPerCacheLine >= 8 ? elementsPerCacheLine : (sizeOfT * 8 > maxInlineByteSize ? 0 : 8); + return nElements * sizeOfT; + }(); + + // compute padding between first 3 datamembers and inline buffer + // (there should be no padding between the first 3 data members) + auto paddingMiddle = dataMemberSz % alignOfT; + if (paddingMiddle != 0) + paddingMiddle = alignOfT - paddingMiddle; + + // padding at the end of the object + auto paddingEnd = (dataMemberSz + paddingMiddle + inlineStorageSz) % alignOfRVecT; + if (paddingEnd != 0) + paddingEnd = alignOfRVecT - paddingEnd; + + return dataMemberSz + inlineStorageSz + paddingMiddle + paddingEnd; +} + +std::size_t EvalRVecAlignment(std::size_t alignOfSubfield) +{ + // the alignment of an RVec is the largest among the alignments of its data members + // (including the inline buffer which has the same alignment as the RVec::value_type) + return std::max({alignof(void *), alignof(std::int32_t), alignOfSubfield}); +} + +void DestroyRVecWithChecks(std::size_t alignOfT, unsigned char **beginPtr, std::int32_t *capacityPtr) +{ + // figure out if we are in the small state, i.e. begin == &inlineBuffer + // there might be padding between fCapacity and the inline buffer, so we compute it here + constexpr auto dataMemberSz = sizeof(void *) + 2 * sizeof(std::int32_t); + auto paddingMiddle = dataMemberSz % alignOfT; + if (paddingMiddle != 0) + paddingMiddle = alignOfT - paddingMiddle; + const bool isSmall = (*beginPtr == (reinterpret_cast(beginPtr) + dataMemberSz + paddingMiddle)); + + const bool owns = (*capacityPtr != -1); + if (!isSmall && owns) + free(*beginPtr); +} + +std::vector SplitVector(std::shared_ptr valuePtr, ROOT::RFieldBase &itemField) +{ + auto *vec = static_cast *>(valuePtr.get()); + const auto itemSize = itemField.GetValueSize(); + R__ASSERT(itemSize > 0); + R__ASSERT((vec->size() % itemSize) == 0); + const auto nItems = vec->size() / itemSize; + std::vector result; + result.reserve(nItems); + for (unsigned i = 0; i < nItems; ++i) { + result.emplace_back(itemField.BindValue(std::shared_ptr(valuePtr, vec->data() + (i * itemSize)))); + } + return result; +} + +} // anonymous namespace + ROOT::RArrayField::RArrayField(std::string_view fieldName, std::unique_ptr itemField, std::size_t arrayLength) : ROOT::RFieldBase(fieldName, @@ -136,92 +237,6 @@ void ROOT::RArrayField::AcceptVisitor(ROOT::Detail::RFieldVisitor &visitor) cons //------------------------------------------------------------------------------ -namespace { - -/// Retrieve the addresses of the data members of a generic RVec from a pointer to the beginning of the RVec object. -/// Returns pointers to fBegin, fSize and fCapacity in a std::tuple. -std::tuple GetRVecDataMembers(void *rvecPtr) -{ - unsigned char **beginPtr = reinterpret_cast(rvecPtr); - // int32_t fSize is the second data member (after 1 void*) - std::int32_t *size = reinterpret_cast(beginPtr + 1); - R__ASSERT(*size >= 0); - // int32_t fCapacity is the third data member (1 int32_t after fSize) - std::int32_t *capacity = size + 1; - R__ASSERT(*capacity >= -1); - return {beginPtr, size, capacity}; -} - -std::tuple -GetRVecDataMembers(const void *rvecPtr) -{ - return {GetRVecDataMembers(const_cast(rvecPtr))}; -} - -std::size_t EvalRVecValueSize(std::size_t alignOfT, std::size_t sizeOfT, std::size_t alignOfRVecT) -{ - // the size of an RVec is the size of its 4 data-members + optional padding: - // - // data members: - // - void *fBegin - // - int32_t fSize - // - int32_t fCapacity - // - the char[] inline storage, which is aligned like T - // - // padding might be present: - // - between fCapacity and the char[] buffer aligned like T - // - after the char[] buffer - - constexpr auto dataMemberSz = sizeof(void *) + 2 * sizeof(std::int32_t); - - // mimic the logic of RVecInlineStorageSize, but at runtime - const auto inlineStorageSz = [&] { - constexpr unsigned cacheLineSize = R__HARDWARE_INTERFERENCE_SIZE; - const unsigned elementsPerCacheLine = (cacheLineSize - dataMemberSz) / sizeOfT; - constexpr unsigned maxInlineByteSize = 1024; - const unsigned nElements = - elementsPerCacheLine >= 8 ? elementsPerCacheLine : (sizeOfT * 8 > maxInlineByteSize ? 0 : 8); - return nElements * sizeOfT; - }(); - - // compute padding between first 3 datamembers and inline buffer - // (there should be no padding between the first 3 data members) - auto paddingMiddle = dataMemberSz % alignOfT; - if (paddingMiddle != 0) - paddingMiddle = alignOfT - paddingMiddle; - - // padding at the end of the object - auto paddingEnd = (dataMemberSz + paddingMiddle + inlineStorageSz) % alignOfRVecT; - if (paddingEnd != 0) - paddingEnd = alignOfRVecT - paddingEnd; - - return dataMemberSz + inlineStorageSz + paddingMiddle + paddingEnd; -} - -std::size_t EvalRVecAlignment(std::size_t alignOfSubfield) -{ - // the alignment of an RVec is the largest among the alignments of its data members - // (including the inline buffer which has the same alignment as the RVec::value_type) - return std::max({alignof(void *), alignof(std::int32_t), alignOfSubfield}); -} - -void DestroyRVecWithChecks(std::size_t alignOfT, unsigned char **beginPtr, std::int32_t *capacityPtr) -{ - // figure out if we are in the small state, i.e. begin == &inlineBuffer - // there might be padding between fCapacity and the inline buffer, so we compute it here - constexpr auto dataMemberSz = sizeof(void *) + 2 * sizeof(std::int32_t); - auto paddingMiddle = dataMemberSz % alignOfT; - if (paddingMiddle != 0) - paddingMiddle = alignOfT - paddingMiddle; - const bool isSmall = (*beginPtr == (reinterpret_cast(beginPtr) + dataMemberSz + paddingMiddle)); - - const bool owns = (*capacityPtr != -1); - if (!isSmall && owns) - free(*beginPtr); -} - -} // anonymous namespace - ROOT::RRVecField::RRVecField(std::string_view fieldName, std::unique_ptr itemField) : ROOT::RFieldBase(fieldName, "ROOT::VecOps::RVec<" + itemField->GetTypeName() + ">", ROOT::ENTupleStructure::kCollection, false /* isSimple */), @@ -678,17 +693,7 @@ std::unique_ptr ROOT::RVectorField::GetDeleter() con std::vector ROOT::RVectorField::SplitValue(const RValue &value) const { - auto valuePtr = value.GetPtr(); - auto *vec = static_cast *>(valuePtr.get()); - R__ASSERT(fItemSize > 0); - R__ASSERT((vec->size() % fItemSize) == 0); - auto nItems = vec->size() / fItemSize; - std::vector result; - result.reserve(nItems); - for (unsigned i = 0; i < nItems; ++i) { - result.emplace_back(fSubfields[0]->BindValue(std::shared_ptr(valuePtr, vec->data() + (i * fItemSize)))); - } - return result; + return SplitVector(value.GetPtr(), *fSubfields[0]); } void ROOT::RVectorField::AcceptVisitor(ROOT::Detail::RFieldVisitor &visitor) const From aa783f7c89a44fbfaf25e4992b937775f06f39cb Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Sun, 28 Sep 2025 22:10:18 +0200 Subject: [PATCH 3/6] [ntuple] add RArrayAsVectorField --- .../ROOT/RField/RFieldSequenceContainer.hxx | 45 ++++++++++ tree/ntuple/inc/ROOT/RFieldVisitor.hxx | 2 + tree/ntuple/src/RFieldSequenceContainer.cxx | 85 +++++++++++++++++++ tree/ntuple/src/RFieldVisitor.cxx | 5 ++ 4 files changed, 137 insertions(+) diff --git a/tree/ntuple/inc/ROOT/RField/RFieldSequenceContainer.hxx b/tree/ntuple/inc/ROOT/RField/RFieldSequenceContainer.hxx index e649aee059fdf..28ea7897823dd 100644 --- a/tree/ntuple/inc/ROOT/RField/RFieldSequenceContainer.hxx +++ b/tree/ntuple/inc/ROOT/RField/RFieldSequenceContainer.hxx @@ -200,6 +200,7 @@ public: /// The generic field for a (nested) `std::vector` except for `std::vector` /// The field can be constructed as untyped collection through CreateUntyped(). class RVectorField : public RFieldBase { + friend class RArrayAsVectorField; // to get access to the RVectorDeleter friend std::unique_ptr Internal::CreateEmulatedVectorField(std::string_view fieldName, std::unique_ptr itemField, std::string_view emulatedFromType); @@ -367,6 +368,50 @@ public: void AcceptVisitor(ROOT::Detail::RFieldVisitor &visitor) const final; }; +/** +\class ROOT::RArrayAsVectorField +\brief A field for fixed-size arrays that are represented as std::vector in memory. +\ingroup NTuple +This class is used only for reading. In particular, it helps for schema evolution of fixed-size arrays into vectors. +*/ +class RArrayAsVectorField final : public RFieldBase { +private: + std::unique_ptr fItemDeleter; /// Sub field deleter or nullptr for simple fields + std::size_t fItemSize; /// The size of a child field's item + std::size_t fArrayLength; /// The length of the arrays in this field + +protected: + std::unique_ptr CloneImpl(std::string_view newName) const final; + + void GenerateColumns() final; + using RFieldBase::GenerateColumns; + + void ConstructValue(void *where) const final { new (where) std::vector(); } + /// Returns an RVectorField::RVectorDeleter + std::unique_ptr GetDeleter() const final; + + void ReadGlobalImpl(ROOT::NTupleSize_t globalIndex, void *to) final; + void ReadInClusterImpl(RNTupleLocalIndex localIndex, void *to) final; + + void ReconcileOnDiskField(const RNTupleDescriptor &desc) final; + +public: + /// The `itemField` argument represents the inner item of the on-disk array, + /// i.e. for an `std::array` it is the `float` + RArrayAsVectorField(std::string_view fieldName, std::unique_ptr itemField, std::size_t arrayLength); + RArrayAsVectorField(const RArrayAsVectorField &other) = delete; + RArrayAsVectorField &operator=(const RArrayAsVectorField &other) = delete; + RArrayAsVectorField(RArrayAsVectorField &&other) = default; + RArrayAsVectorField &operator=(RArrayAsVectorField &&other) = default; + ~RArrayAsVectorField() final = default; + + size_t GetValueSize() const final { return sizeof(std::vector); } + size_t GetAlignment() const final { return std::alignment_of>(); } + + std::vector SplitValue(const RFieldBase::RValue &value) const final; + void AcceptVisitor(ROOT::Detail::RFieldVisitor &visitor) const final; +}; + } // namespace ROOT #endif diff --git a/tree/ntuple/inc/ROOT/RFieldVisitor.hxx b/tree/ntuple/inc/ROOT/RFieldVisitor.hxx index b95c718d727ad..24b22f538bed3 100644 --- a/tree/ntuple/inc/ROOT/RFieldVisitor.hxx +++ b/tree/ntuple/inc/ROOT/RFieldVisitor.hxx @@ -61,6 +61,7 @@ public: virtual void VisitFieldZero(const ROOT::RFieldZero &field) { VisitField(field); } virtual void VisitArrayField(const ROOT::RArrayField &field) { VisitField(field); } virtual void VisitArrayAsRVecField(const ROOT::RArrayAsRVecField &field) { VisitField(field); } + virtual void VisitArrayAsVectorField(const ROOT::RArrayAsVectorField &field) { VisitField(field); } virtual void VisitAtomicField(const ROOT::RAtomicField &field) { VisitField(field); } virtual void VisitBitsetField(const ROOT::RBitsetField &field) { VisitField(field); } virtual void VisitBoolField(const ROOT::RField &field) { VisitField(field); } @@ -235,6 +236,7 @@ public: void VisitCardinalityField(const ROOT::RCardinalityField &field) final; void VisitArrayField(const ROOT::RArrayField &field) final; void VisitArrayAsRVecField(const ROOT::RArrayAsRVecField &field) final; + void VisitArrayAsVectorField(const ROOT::RArrayAsVectorField &field) final; void VisitClassField(const ROOT::RClassField &field) final; void VisitTObjectField(const ROOT::RField &field) final; void VisitStreamerField(const ROOT::RStreamerField &field) final; diff --git a/tree/ntuple/src/RFieldSequenceContainer.cxx b/tree/ntuple/src/RFieldSequenceContainer.cxx index b72ce5226290f..e79663246bda2 100644 --- a/tree/ntuple/src/RFieldSequenceContainer.cxx +++ b/tree/ntuple/src/RFieldSequenceContainer.cxx @@ -884,3 +884,88 @@ void ROOT::RArrayAsRVecField::AcceptVisitor(ROOT::Detail::RFieldVisitor &visitor { visitor.VisitArrayAsRVecField(*this); } + +//------------------------------------------------------------------------------ + +ROOT::RArrayAsVectorField::RArrayAsVectorField(std::string_view fieldName, std::unique_ptr itemField, + std::size_t arrayLength) + : ROOT::RFieldBase(fieldName, "std::vector<" + itemField->GetTypeName() + ">", ROOT::ENTupleStructure::kCollection, + false /* isSimple */), + fItemSize(itemField->GetValueSize()), + fArrayLength(arrayLength) +{ + Attach(std::move(itemField)); + if (!(fSubfields[0]->GetTraits() & kTraitTriviallyDestructible)) + fItemDeleter = GetDeleterOf(*fSubfields[0]); +} + +std::unique_ptr ROOT::RArrayAsVectorField::CloneImpl(std::string_view newName) const +{ + auto newItemField = fSubfields[0]->Clone(fSubfields[0]->GetFieldName()); + return std::make_unique(newName, std::move(newItemField), fArrayLength); +} + +void ROOT::RArrayAsVectorField::GenerateColumns() +{ + throw RException(R__FAIL("RArrayAsVectorField fields must only be used for reading")); +} + +std::unique_ptr ROOT::RArrayAsVectorField::GetDeleter() const +{ + if (fItemDeleter) + return std::make_unique(fItemSize, GetDeleterOf(*fSubfields[0])); + return std::make_unique(); +} + +void ROOT::RArrayAsVectorField::ReadGlobalImpl(ROOT::NTupleSize_t globalIndex, void *to) +{ + auto typedValue = static_cast *>(to); + + if (fSubfields[0]->IsSimple()) { + typedValue->resize(fArrayLength * fItemSize); + GetPrincipalColumnOf(*fSubfields[0])->ReadV(globalIndex * fArrayLength, fArrayLength, typedValue->data()); + return; + } + + RVectorField::ResizeVector(to, fArrayLength, fItemSize, *fSubfields[0], fItemDeleter.get()); + + for (std::size_t i = 0; i < fArrayLength; ++i) { + CallReadOn(*fSubfields[0], globalIndex * fArrayLength + i, typedValue->data() + (i * fItemSize)); + } +} + +void ROOT::RArrayAsVectorField::ReadInClusterImpl(ROOT::RNTupleLocalIndex localIndex, void *to) +{ + auto typedValue = static_cast *>(to); + + if (fSubfields[0]->IsSimple()) { + typedValue->resize(fArrayLength * fItemSize); + GetPrincipalColumnOf(*fSubfields[0])->ReadV(localIndex * fArrayLength, fArrayLength, typedValue->data()); + return; + } + + RVectorField::ResizeVector(to, fArrayLength, fItemSize, *fSubfields[0], fItemDeleter.get()); + + for (std::size_t i = 0; i < fArrayLength; ++i) { + CallReadOn(*fSubfields[0], localIndex * fArrayLength + i, typedValue->data() + (i * fItemSize)); + } +} + +void ROOT::RArrayAsVectorField::ReconcileOnDiskField(const RNTupleDescriptor &desc) +{ + const auto &fieldDesc = desc.GetFieldDescriptor(GetOnDiskId()); + EnsureMatchingOnDiskField(fieldDesc, kDiffTypeName | kDiffTypeVersion | kDiffStructure | kDiffNRepetitions); + if (fieldDesc.GetTypeName().rfind("std::array<", 0) != 0) { + throw RException(R__FAIL("RArrayAsVectorField " + GetQualifiedFieldName() + " expects an on-disk array field")); + } +} + +std::vector ROOT::RArrayAsVectorField::SplitValue(const ROOT::RFieldBase::RValue &value) const +{ + return SplitVector(value.GetPtr(), *fSubfields[0]); +} + +void ROOT::RArrayAsVectorField::AcceptVisitor(ROOT::Detail::RFieldVisitor &visitor) const +{ + visitor.VisitArrayAsVectorField(*this); +} diff --git a/tree/ntuple/src/RFieldVisitor.cxx b/tree/ntuple/src/RFieldVisitor.cxx index 73da2c27d2719..a5f5cd1f90abd 100644 --- a/tree/ntuple/src/RFieldVisitor.cxx +++ b/tree/ntuple/src/RFieldVisitor.cxx @@ -325,6 +325,11 @@ void ROOT::Internal::RPrintValueVisitor::VisitArrayAsRVecField(const ROOT::RArra PrintCollection(field); } +void ROOT::Internal::RPrintValueVisitor::VisitArrayAsVectorField(const ROOT::RArrayAsVectorField &field) +{ + PrintCollection(field); +} + void ROOT::Internal::RPrintValueVisitor::VisitStreamerField(const ROOT::RStreamerField &field) { PrintIndent(); From 05309aa96ae07a83250e615a3a035b20e101ac72 Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Wed, 1 Oct 2025 21:54:01 +0200 Subject: [PATCH 4/6] [ntuple] evolve fixed-size array to RVectorField Substitute the RVectorField by an RArrayAsVectorField if a fixed-size array is found on disk. --- .../inc/ROOT/RField/RFieldSequenceContainer.hxx | 1 + tree/ntuple/src/RFieldSequenceContainer.cxx | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/tree/ntuple/inc/ROOT/RField/RFieldSequenceContainer.hxx b/tree/ntuple/inc/ROOT/RField/RFieldSequenceContainer.hxx index 28ea7897823dd..6caf3629ac8c4 100644 --- a/tree/ntuple/inc/ROOT/RField/RFieldSequenceContainer.hxx +++ b/tree/ntuple/inc/ROOT/RField/RFieldSequenceContainer.hxx @@ -246,6 +246,7 @@ protected: std::size_t AppendImpl(const void *from) final; void ReadGlobalImpl(ROOT::NTupleSize_t globalIndex, void *to) final; + std::unique_ptr BeforeConnectPageSource(ROOT::Internal::RPageSource &pageSource) final; void ReconcileOnDiskField(const RNTupleDescriptor &desc) final; void CommitClusterImpl() final { fNWritten = 0; } diff --git a/tree/ntuple/src/RFieldSequenceContainer.cxx b/tree/ntuple/src/RFieldSequenceContainer.cxx index e79663246bda2..10912bd01e58c 100644 --- a/tree/ntuple/src/RFieldSequenceContainer.cxx +++ b/tree/ntuple/src/RFieldSequenceContainer.cxx @@ -664,6 +664,22 @@ void ROOT::RVectorField::GenerateColumns(const ROOT::RNTupleDescriptor &desc) GenerateColumnsImpl(desc); } +std::unique_ptr ROOT::RVectorField::BeforeConnectPageSource(Internal::RPageSource &pageSource) +{ + if (GetOnDiskId() == kInvalidDescriptorId) + return nullptr; + + const auto descGuard = pageSource.GetSharedDescriptorGuard(); + const auto &fieldDesc = descGuard->GetFieldDescriptor(GetOnDiskId()); + if (fieldDesc.GetTypeName().rfind("std::array<", 0) == 0) { + auto substitute = std::make_unique( + GetFieldName(), fSubfields[0]->Clone(fSubfields[0]->GetFieldName()), fieldDesc.GetNRepetitions()); + substitute->SetOnDiskId(GetOnDiskId()); + return substitute; + } + return nullptr; +} + void ROOT::RVectorField::ReconcileOnDiskField(const RNTupleDescriptor &desc) { EnsureMatchingOnDiskField(desc, kDiffTypeName).ThrowOnError(); From 9e2a3784f99f28e020bff49f465f23240439927f Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Sun, 28 Sep 2025 22:39:19 +0200 Subject: [PATCH 5/6] [ntuple] evolve fixed-size array to std::vector Direct support of fixed-size on-disk arrays in the std::vector specialization. This is different from the general RVectorField case because the bool vector is much simpler to begin with and it is also already inherently slow. --- .../ROOT/RField/RFieldSequenceContainer.hxx | 4 + tree/ntuple/src/RFieldSequenceContainer.cxx | 83 +++++++++++++++---- 2 files changed, 72 insertions(+), 15 deletions(-) diff --git a/tree/ntuple/inc/ROOT/RField/RFieldSequenceContainer.hxx b/tree/ntuple/inc/ROOT/RField/RFieldSequenceContainer.hxx index 6caf3629ac8c4..e4b152f0ccb49 100644 --- a/tree/ntuple/inc/ROOT/RField/RFieldSequenceContainer.hxx +++ b/tree/ntuple/inc/ROOT/RField/RFieldSequenceContainer.hxx @@ -281,6 +281,9 @@ template <> class RField> final : public RFieldBase { private: ROOT::Internal::RColumnIndex fNWritten{0}; + /// If schema-evolved from an std::array, fOnDiskNRepetition is > 0 and there will be no + /// principal column. + std::size_t fOnDiskNRepetitions = 0; protected: std::unique_ptr CloneImpl(std::string_view newName) const final @@ -297,6 +300,7 @@ protected: std::size_t AppendImpl(const void *from) final; void ReadGlobalImpl(ROOT::NTupleSize_t globalIndex, void *to) final; + void ReadInClusterImpl(ROOT::RNTupleLocalIndex localIndex, void *to) final; void ReconcileOnDiskField(const RNTupleDescriptor &desc) final; diff --git a/tree/ntuple/src/RFieldSequenceContainer.cxx b/tree/ntuple/src/RFieldSequenceContainer.cxx index 10912bd01e58c..d780ec31ade08 100644 --- a/tree/ntuple/src/RFieldSequenceContainer.cxx +++ b/tree/ntuple/src/RFieldSequenceContainer.cxx @@ -742,15 +742,47 @@ void ROOT::RField>::ReadGlobalImpl(ROOT::NTupleSize_t globalIn { auto typedValue = static_cast *>(to); - ROOT::NTupleSize_t nItems; - RNTupleLocalIndex collectionStart; - fPrincipalColumn->GetCollectionInfo(globalIndex, &collectionStart, &nItems); + if (fOnDiskNRepetitions == 0) { + ROOT::NTupleSize_t nItems; + RNTupleLocalIndex collectionStart; + fPrincipalColumn->GetCollectionInfo(globalIndex, &collectionStart, &nItems); + typedValue->resize(nItems); + for (std::size_t i = 0; i < nItems; ++i) { + bool bval; + CallReadOn(*fSubfields[0], collectionStart + i, &bval); + (*typedValue)[i] = bval; + } + } else { + typedValue->resize(fOnDiskNRepetitions); + for (std::size_t i = 0; i < fOnDiskNRepetitions; ++i) { + bool bval; + CallReadOn(*fSubfields[0], globalIndex * fOnDiskNRepetitions + i, &bval); + (*typedValue)[i] = bval; + } + } +} - typedValue->resize(nItems); - for (unsigned i = 0; i < nItems; ++i) { - bool bval; - CallReadOn(*fSubfields[0], collectionStart + i, &bval); - (*typedValue)[i] = bval; +void ROOT::RField>::ReadInClusterImpl(ROOT::RNTupleLocalIndex localIndex, void *to) +{ + auto typedValue = static_cast *>(to); + + if (fOnDiskNRepetitions == 0) { + ROOT::NTupleSize_t nItems; + RNTupleLocalIndex collectionStart; + fPrincipalColumn->GetCollectionInfo(localIndex, &collectionStart, &nItems); + typedValue->resize(nItems); + for (std::size_t i = 0; i < nItems; ++i) { + bool bval; + CallReadOn(*fSubfields[0], collectionStart + i, &bval); + (*typedValue)[i] = bval; + } + } else { + typedValue->resize(fOnDiskNRepetitions); + for (std::size_t i = 0; i < fOnDiskNRepetitions; ++i) { + bool bval; + CallReadOn(*fSubfields[0], localIndex * fOnDiskNRepetitions + i, &bval); + (*typedValue)[i] = bval; + } } } @@ -760,23 +792,41 @@ const ROOT::RFieldBase::RColumnRepresentations &ROOT::RField>: {ENTupleColumnType::kIndex64}, {ENTupleColumnType::kSplitIndex32}, {ENTupleColumnType::kIndex32}}, - {}); + {{}}); return representations; } void ROOT::RField>::GenerateColumns() { + R__ASSERT(fOnDiskNRepetitions == 0); // fOnDiskNRepetitions must only be used for reading GenerateColumnsImpl(); } void ROOT::RField>::GenerateColumns(const ROOT::RNTupleDescriptor &desc) { - GenerateColumnsImpl(desc); + if (fOnDiskNRepetitions == 0) + GenerateColumnsImpl(desc); } void ROOT::RField>::ReconcileOnDiskField(const RNTupleDescriptor &desc) { - EnsureMatchingOnDiskField(desc, kDiffTypeName).ThrowOnError(); + const auto &fieldDesc = desc.GetFieldDescriptor(GetOnDiskId()); + + if (fieldDesc.GetTypeName().rfind("std::array<", 0) == 0) { + EnsureMatchingOnDiskField(desc, kDiffTypeName | kDiffStructure | kDiffNRepetitions).ThrowOnError(); + + if (fieldDesc.GetNRepetitions() == 0) { + throw RException(R__FAIL("fixed-size array --> std::vector: expected repetition count > 0\n" + + Internal::GetTypeTraceReport(*this, desc))); + } + if (fieldDesc.GetStructure() != ENTupleStructure::kPlain) { + throw RException(R__FAIL("fixed-size array --> std::vector: expected plain on-disk field\n" + + Internal::GetTypeTraceReport(*this, desc))); + } + fOnDiskNRepetitions = fieldDesc.GetNRepetitions(); + } else { + EnsureMatchingOnDiskField(desc, kDiffTypeName).ThrowOnError(); + } } std::vector ROOT::RField>::SplitValue(const RValue &value) const @@ -786,10 +836,11 @@ std::vector ROOT::RField>::SplitValu std::vector result; result.reserve(count); for (unsigned i = 0; i < count; ++i) { - if (typedValue[i]) + if (typedValue[i]) { result.emplace_back(fSubfields[0]->BindValue(std::shared_ptr(new bool(true)))); - else + } else { result.emplace_back(fSubfields[0]->BindValue(std::shared_ptr(new bool(false)))); + } } return result; } @@ -969,10 +1020,12 @@ void ROOT::RArrayAsVectorField::ReadInClusterImpl(ROOT::RNTupleLocalIndex localI void ROOT::RArrayAsVectorField::ReconcileOnDiskField(const RNTupleDescriptor &desc) { + EnsureMatchingOnDiskField(desc, kDiffTypeName | kDiffTypeVersion | kDiffStructure | kDiffNRepetitions); + const auto &fieldDesc = desc.GetFieldDescriptor(GetOnDiskId()); - EnsureMatchingOnDiskField(fieldDesc, kDiffTypeName | kDiffTypeVersion | kDiffStructure | kDiffNRepetitions); if (fieldDesc.GetTypeName().rfind("std::array<", 0) != 0) { - throw RException(R__FAIL("RArrayAsVectorField " + GetQualifiedFieldName() + " expects an on-disk array field")); + throw RException(R__FAIL("RArrayAsVectorField " + GetQualifiedFieldName() + " expects an on-disk array field\n" + + Internal::GetTypeTraceReport(*this, desc))); } } From 5994ae14881e7e7ce38314d2a3b95cccb2142411 Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Wed, 1 Oct 2025 21:54:19 +0200 Subject: [PATCH 6/6] [ntuple] test fixed-size array to vector evolution --- tree/ntuple/test/ntuple_evolution_type.cxx | 27 ++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tree/ntuple/test/ntuple_evolution_type.cxx b/tree/ntuple/test/ntuple_evolution_type.cxx index 75ef884464ccb..f939370bc7636 100644 --- a/tree/ntuple/test/ntuple_evolution_type.cxx +++ b/tree/ntuple/test/ntuple_evolution_type.cxx @@ -256,6 +256,33 @@ TEST(RNTupleEvolution, ArrayAsRVec) EXPECT_EQ(2, a(0)[1]); } +TEST(RNTupleEvolution, ArrayAsVector) +{ + FileRaii fileGuard("test_ntuple_evolution_array_as_vector.root"); + { + auto model = ROOT::RNTupleModel::Create(); + auto a = model->MakeField>("a"); + auto writer = ROOT::RNTupleWriter::Recreate(std::move(model), "ntpl", fileGuard.GetPath()); + + *a = {0, 1}; + writer->Fill(); + } + + auto reader = RNTupleReader::Open("ntpl", fileGuard.GetPath()); + + auto aAsShort = reader->GetView>("a"); + const auto &f = aAsShort.GetField(); // necessary to silence clang warning + EXPECT_EQ(typeid(f), typeid(ROOT::RArrayAsVectorField)); + EXPECT_EQ(2u, aAsShort(0).size()); + EXPECT_EQ(0, aAsShort(0)[0]); + EXPECT_EQ(1, aAsShort(0)[1]); + + auto aAsBool = reader->GetView>("a"); + EXPECT_EQ(2u, aAsBool(0).size()); + EXPECT_FALSE(aAsBool(0)[0]); + EXPECT_TRUE(aAsBool(0)[1]); +} + TEST(RNTupleEvolution, NullableToVector) { FileRaii fileGuard("test_ntuple_evolution_nullable_to_vector.root");