NVIDIA
diff --git a/‎cpp/include/tensorrt_llm/batch_manager/kvCacheEventManager.h‎
Lines changed: 15 additions & 8 deletions b/‎cpp/include/tensorrt_llm/batch_manager/kvCacheEventManager.h‎
Lines changed: 15 additions & 8 deletions
diff --git a/‎cpp/include/tensorrt_llm/executor/executor.h‎
Lines changed: 7 additions & 6 deletions b/‎cpp/include/tensorrt_llm/executor/executor.h‎
Lines changed: 7 additions & 6 deletions
diff --git a/‎cpp/include/tensorrt_llm/executor/serialization.h‎
Lines changed: 11 additions & 3 deletions b/‎cpp/include/tensorrt_llm/executor/serialization.h‎
Lines changed: 11 additions & 3 deletions
diff --git a/‎cpp/tensorrt_llm/batch_manager/kvCacheEventManager.cpp‎
Lines changed: 62 additions & 50 deletions b/‎cpp/tensorrt_llm/batch_manager/kvCacheEventManager.cpp‎
Lines changed: 62 additions & 50 deletions
diff --git a/‎cpp/tensorrt_llm/executor/kvCacheConfig.cpp‎
Lines changed: 15 additions & 0 deletions b/‎cpp/tensorrt_llm/executor/kvCacheConfig.cpp‎
Lines changed: 15 additions & 0 deletions
@@ -36,9 +36,8 @@ using BlockPtr = std::shared_ptr<KVCacheBlock>;
 class KVCacheEventManager
 {
 public:
-    explicit KVCacheEventManager(size_t maxKVEventEntries, bool enableAttentionDp = false, std::optional<SizeType32>,
-        attentionDpRank = std::nullopt, std::optional<SizeType32> attentionDpSize = std::nullopt,
-        std::optional<SizeTyp32> ppSize);
+    explicit KVCacheEventManager(size_t maxKVEventEntries, std::optional<SizeType32> attentionDpRank = std::nullopt,
+        std::optional<SizeType32> attentionDpSize = std::nullopt, SizeType32 attentionDpEventsGatherPeriodMs = 5);
 
     ~KVCacheEventManager();
     KVCacheEventManager(KVCacheEventManager& other) = delete;
@@ -63,6 +62,9 @@ class KVCacheEventManager
     // Worker thread which adds events to mEvents.
     void worker();
 
+    // Thread which exchange events if attentionDP is enabled
+    void exchangeAttentionDpThread();
+
 private:
     // Add an event to mEventQueue
     void enqueueEvent(executor::KVCacheEvent&& event);
@@ -71,6 +73,8 @@ class KVCacheEventManager
     bool mRun;
     /// @brief Worker thread
     std::thread mWorkerThread;
+    /// @brief Exchange thread for attention DP events
+    std::thread mExchangeAttentionDpThread;
 
     /// @brief The deque of events
     std::deque<executor::KVCacheEvent> mEvents;
@@ -93,11 +97,14 @@ class KVCacheEventManager
     size_t mMaxSize;
     /// @brief An auto-incrementing event id counter
     size_t mEventId;
-    /// @bried Whether this model uses attention DP
-    /// This is used to determine if we need to gather KV cache events
-    bool mEnableAttentionDp{false};
-    std::optional<mMaxSize> mAttentionDpRank;
-    std::optional<mMaxSize> mAttentionDpSize;
+
+    /// @brief Attention DP ranks and size
+    /// If set, we will exchange KV cache events and accumulate on rank 0
+    std::optional<SizeType32> mAttentionDpRank;
+    std::optional<SizeType32> mAttentionDpSize;
+
+    /// @brief The period in milliseconds to gather attention DP events across rank
+    SizeType32 mAttentionDpEventsGatherPeriodMs;
 };
 
 } // namespace tensorrt_llm::batch_manager::kv_cache_manager
@@ -1001,6 +1001,7 @@ class KvCacheConfig
         std::optional<FloatType> const& crossKvCacheFraction = std::nullopt,
         std::optional<RetentionPriority> secondaryOffloadMinPriority = std::nullopt, size_t eventBufferMaxSize = 0,
         bool enablePartialReuse = true, bool copyOnPartialReuse = true, bool useUvm = false,
+        SizeType32 attentionDpEventsGatherPeriodMs = 5,
         std::optional<tensorrt_llm::runtime::RuntimeDefaults> const& runtimeDefaults = std::nullopt);
 
     [[nodiscard]] bool getEnableBlockReuse() const;
@@ -1016,6 +1017,7 @@ class KvCacheConfig
     [[nodiscard]] std::optional<RetentionPriority> getSecondaryOffloadMinPriority() const;
     [[nodiscard]] size_t getEventBufferMaxSize() const;
     [[nodiscard]] bool getUseUvm() const;
+    [[nodiscard]] SizeType32 getAttentionDpEventsGatherPeriodMs() const;
 
     void setEnableBlockReuse(bool enableBlockReuse);
     void setEnablePartialReuse(bool enablePartialReuse);
@@ -1030,6 +1032,7 @@ class KvCacheConfig
     void setSecondaryOffloadMinPriority(std::optional<RetentionPriority> secondaryOffloadMinPriority);
     void setEventBufferMaxSize(size_t eventBufferMaxSize);
     void setUseUvm(bool useUvm);
+    void setAttentionDpEventsGatherPeriodMs(SizeType32 attentionDpEventPollPeriodMs);
 
     void fillEmptyFieldsFromRuntimeDefaults(tensorrt_llm::runtime::RuntimeDefaults const& runtimeDefaults);
 
@@ -1085,6 +1088,9 @@ class KvCacheConfig
 
     /// @brief Whether to use UVM for the KV cache.
     bool mUseUvm;
+
+    /// @brief The period in milliseconds to poll for attention DP events across rank
+    SizeType32 mAttentionDpEventsGatherPeriodMs;
 };
 
 /// @brief Configuration class for the runtime perf knobs
@@ -1732,13 +1738,8 @@ using KVCacheEventData = std::variant<KVCacheCreatedData, KVCacheStoredData, KVC
 
 struct KVCacheEvent
 {
-
     KVCacheEvent(IdType eventId, KVCacheEventData data, SizeType32 windowSize,
-        std::optional<SizeType32> attentionDpRank = std::nullopt)
-        : eventId{eventId}
-        , data{std::move(data)}
-        , windowSize{windowSize}
-        , attentionDpRank{attentionDpRank} {};
+        std::optional<SizeType32> attentionDpRank = std::nullopt);
 
     /// @brief The unique id of this event
     IdType eventId;
 
@@ -332,15 +332,23 @@ class Serialization
     [[nodiscard]] static KVCacheRemovedData deserializeKVCacheRemovedData(std::istream& is);
 
     // KVCacheEventDiff
-    [[nodiscard]] static size_t serializedSize(KVCacheEventDiff<SizeType32> const& data);
-    static void serialize(KVCacheEventDiff<SizeType32> const& data, std::ostream& os);
-    [[nodiscard]] static KVCacheEventDiff<SizeType32> deserializeKVCacheEventDiff(std::istream& is);
+    template <typename T>
+    [[nodiscard]] static size_t serializedSize(KVCacheEventDiff<T> const& data);
+    template <typename T>
+    static void serialize(KVCacheEventDiff<T> const& data, std::ostream& os);
+    template <typename T>
+    [[nodiscard]] static KVCacheEventDiff<T> deserializeKVCacheEventDiff(std::istream& is);
 
     // KVCacheUpdateData
     [[nodiscard]] static size_t serializedSize(KVCacheUpdatedData const& data);
     static void serialize(KVCacheUpdatedData const& data, std::ostream& os);
     [[nodiscard]] static KVCacheUpdatedData deserializeKVCacheUpdatedData(std::istream& is);
 
+    // UniqueToken
+    [[nodiscard]] static size_t serializedSize(tensorrt_llm::runtime::UniqueToken const& token);
+    static void serialize(tensorrt_llm::runtime::UniqueToken const& token, std::ostream& os);
+    [[nodiscard]] static tensorrt_llm::runtime::UniqueToken deserializeUniqueToken(std::istream& is);
+
     // String
     static std::string deserializeString(std::istream& is);
 
 
@@ -18,38 +18,48 @@
 #include "tensorrt_llm/batch_manager/kvCacheEventManager.h"
 #include "tensorrt_llm/batch_manager/kvCacheManager.h"
 #include "tensorrt_llm/executor/executor.h"
+#include "tensorrt_llm/executor/serialization.h"
+#include "tensorrt_llm/runtime/utils/mpiUtils.h"
 
 namespace tle = tensorrt_llm::executor;
 
 namespace tensorrt_llm::batch_manager::kv_cache_manager
 {
 
 KVCacheEventManager::KVCacheEventManager(size_t maxKVEventEntries, std::optional<SizeType32> attentionDpRank,
-    std::optional<SizeType32> attentionDpSize, std::optional<attentionDpSize> ppSize)
+    std::optional<SizeType32> attentionDpSize, SizeType32 attentionDpEventsGatherPeriodMs)
     : mRun{true}
     , mMaxSize{maxKVEventEntries}
     , mEventId{0}
     , mAttentionDpRank{attentionDpRank}
     , mAttentionDpSize{attentionDpSize}
+    , mAttentionDpEventsGatherPeriodMs(attentionDpEventsGatherPeriodMs)
 {
+
     TLLM_CHECK(mMaxSize > 0);
     if (mAttentionDpRank)
     {
         TLLM_CHECK_WITH_INFO(
             mAttentionDpSize.has_value(), "If attention DP rank is set, the attention DP size must also be set");
-        TLLM_CHECK(ppSize.has_value());
-        TLLM_CHECK_WITH_INFO(ppSize.value() == 1, "Events with attention DP are not supported with PP > 1");
         TLLM_CHECK_WITH_INFO(mAttentionDpRank.value() < mAttentionDpSize.value(),
             "Attention DP rank must be less than attention DP size");
+        if (mAttentionDpRank.value() == 0)
+        {
+            // Rank 0 will gather events from all other ranks
+            // Need to increase size
+            mMaxSize *= mAttentionDpSize.value();
+        }
     }
     else
     {
         TLLM_CHECK_WITH_INFO(
-            !mAttentionDpSize.has_value(), "If attention DP size is set, the attention DP rank must also be set");
+            !mAttentionDpSize.has_value(), "If attention DP rank is not set, the attention DP size must not be set");
     }
-    // mWorkerThread = std::thread(std::bind(&KVCacheEventManager::worker, this));
     mWorkerThread = std::thread([this]() { this->worker(); });
-    mExchangeAttentionDpThread = std::thread([this]() { this->exchangeAttentionDpEvents(); });
+    if (mAttentionDpRank)
+    {
+        mExchangeAttentionDpThread = std::thread([this]() { this->exchangeAttentionDpThread(); });
+    }
 };
 
 KVCacheEventManager::~KVCacheEventManager()
@@ -58,7 +68,10 @@ KVCacheEventManager::~KVCacheEventManager()
     mPendingEmptyCV.notify_all();
     mEmptyCV.notify_all();
     mWorkerThread.join();
-    mAttentionDpExchangeThread.join();
+    if (mAttentionDpRank)
+    {
+        mExchangeAttentionDpThread.join();
+    }
 }
 
 void KVCacheEventManager::enqueueCreatedEvent(
@@ -84,7 +97,7 @@ void KVCacheEventManager::enqueueStoredEvent(std::vector<BlockPtr> const& blocks
     for (auto const& block : blocks)
     {
         data.blocks.emplace_back(block->getHash(), block->getUniqueTokens(), block->getBlockKey().loraTaskId,
-            block->isPrimary() ? kPrimaryLevel : kSecondaryLevel, block->getPriority(), mAttentionDpRank);
+            block->isPrimary() ? kPrimaryLevel : kSecondaryLevel, block->getPriority());
     }
 
     enqueueEvent({mEventId++, data, windowSize, mAttentionDpRank});
@@ -100,7 +113,7 @@ void KVCacheEventManager::enqueueRemovedEvent(BlockPtr const& block, SizeType32
     }
     else
     {
-        enqueueEvent({mEventId++, tle::KVCacheRemovedData{{block->getHash()}}, windowSize});
+        enqueueEvent({mEventId++, tle::KVCacheRemovedData{{block->getHash()}}, windowSize, mAttentionDpRank});
     }
 }
 
@@ -136,28 +149,27 @@ void KVCacheEventManager::flush()
     auto eventQueue = std::exchange(mEventQueue, {});
     std::unique_lock<std::mutex> lck(mPendingEventsMutex);
     mPendingEvents.push_back(std::move(eventQueue));
-    // If we have events, we need to notify the worker thread to process them
     mPendingEmptyCV.notify_one();
 }
 
 void KVCacheEventManager::exchangeAttentionDpThread()
 {
-    int32_t pollPeriodMs = 5;
     while (true)
     {
-        // If we are not rank 0, send events asynchronously
+        TLLM_CHECK(mAttentionDpRank);
+        // If we are not rank 0, send events to rank 0
         if (mAttentionDpRank.value() != 0)
         {
             std::vector<char> serializedEvents;
             {
                 std::unique_lock<std::mutex> lck(mEventsMutex);
-                serializedEvents = Serialization::serialize(mEvents);
+                serializedEvents = executor::Serialization::serialize(mEvents);
                 mEvents.clear();
             }
             uint64_t vecSize = serializedEvents.size();
-            COMM_SESSION.send(&vecSize, 1, MpiType::kUINT64, 0, MpiTag::kKVCacheEventSize);
+            COMM_SESSION.send(&vecSize, 1, mpi::MpiType::kUINT64, 0, mpi::MpiTag::kKvCacheEventSize);
             COMM_SESSION.send(
-                serializedEvents.data(), serializedEvents.size(), MpiType::kCHAR, 0, MpiTag::kKVCacheEvent);
+                serializedEvents.data(), serializedEvents.size(), mpi::MpiType::kCHAR, 0, mpi::MpiTag::kKvCacheEvent);
         }
         else
         {
@@ -167,18 +179,18 @@ void KVCacheEventManager::exchangeAttentionDpThread()
             while (numRecvs < mAttentionDpSize.value() - 1)
             {
                 MPI_Status probeStatus;
-                if (COMM_SESSION.iprobe(MPI_ANY_SOURCE, MpiTag::kKVCacheEvent, &status))
+                if (COMM_SESSION.iprobe(MPI_ANY_SOURCE, mpi::MpiTag::kKvCacheEvent, &probeStatus))
                 {
-                    uint64_t vecSize;
+                    uint64_t vecSize{0};
                     COMM_SESSION.recv(
-                        &vecSize, 1, mpi::MpiType::kUINT64, probeStatus.MPI_SOURCE, mpi::MpiTag::kKVCacheEventSize);
+                        &vecSize, 1, mpi::MpiType::kUINT64, probeStatus.MPI_SOURCE, mpi::MpiTag::kKvCacheEventSize);
 
                     std::vector<char> serializedEvents(vecSize);
-                    COMM_SESSION.recv(&serializedEvents.data(), vecSize, mpi::MpiType::kCHAR, probeStatus.MPI_SOURCE,
-                        mpi::MpiTag::kKVCacheEvent);
+                    COMM_SESSION.recv(serializedEvents.data(), vecSize, mpi::MpiType::kCHAR, probeStatus.MPI_SOURCE,
+                        mpi::MpiTag::kKvCacheEvent);
 
                     // Deserialize the events and add them to the local queue
-                    auto rankEvents = Serialization::deserializeKVCacheEvents(serializedEvents);
+                    auto rankEvents = executor::Serialization::deserializeKVCacheEvents(serializedEvents);
                     {
                         std::unique_lock<std::mutex> lck(mEventsMutex);
                         mEvents.insert(mEvents.end(), rankEvents.begin(), rankEvents.end());
@@ -187,47 +199,47 @@ void KVCacheEventManager::exchangeAttentionDpThread()
                     numRecvs++;
                 }
             }
-            std::this_thread::sleep_for(std::chrono::milliseconds(pollPeriodMs));
+            std::this_thread::sleep_for(std::chrono::milliseconds(mAttentionDpEventsGatherPeriodMs));
         }
     }
+}
 
-    void KVCacheEventManager::worker()
-    {
+void KVCacheEventManager::worker()
+{
 
-        while (true)
+    while (true)
+    {
+        std::deque<tle::KVCacheEvent> events;
         {
-            std::deque<tle::KVCacheEvent> events;
+            std::unique_lock<std::mutex> pendingLock(mPendingEventsMutex);
+            mPendingEmptyCV.wait(pendingLock, [this] { return !mPendingEvents.empty() || !mRun; });
+            if (!mRun)
             {
-                std::unique_lock<std::mutex> pendingLock(mPendingEventsMutex);
-                mPendingEmptyCV.wait(pendingLock, [this] { return !mPendingEvents.empty() || !mRun; });
-                if (!mRun)
-                {
-                    return;
-                }
-                events = mPendingEvents.front();
-                mPendingEvents.pop_front();
+                return;
             }
+            events = mPendingEvents.front();
+            mPendingEvents.pop_front();
+        }
 
-            std::unique_lock<std::mutex> lck(mEventsMutex);
+        std::unique_lock<std::mutex> lck(mEventsMutex);
 
-            SizeType32 elementsToRemove = mEvents.size() + events.size() - mMaxSize;
+        SizeType32 elementsToRemove = mEvents.size() + events.size() - mMaxSize;
 
-            // First, take elements from mEvents since they are the oldest.
-            if (elementsToRemove > 0)
-            {
-                SizeType32 numRemoved = std::min(static_cast<SizeType32>(mEvents.size()), elementsToRemove);
-                mEvents.erase(mEvents.begin(), mEvents.begin() + numRemoved);
-                elementsToRemove -= numRemoved;
-                TLLM_LOG_WARNING(
-                    "The event queue has reached the max size of %d. Events have been discarded.", mMaxSize);
-            }
+        // First, take elements from mEvents since they are the oldest.
+        if (elementsToRemove > 0)
+        {
+            SizeType32 numRemoved = std::min(static_cast<SizeType32>(mEvents.size()), elementsToRemove);
+            mEvents.erase(mEvents.begin(), mEvents.begin() + numRemoved);
+            elementsToRemove -= numRemoved;
+            TLLM_LOG_WARNING("The event queue has reached the max size of %d. Events have been discarded.", mMaxSize);
+        }
 
-            // If there's still too many events, take from the front of the events queue.
-            mEvents.insert(mEvents.end(), events.begin() + std::max(0, elementsToRemove), events.end());
+        // If there's still too many events, take from the front of the events queue.
+        mEvents.insert(mEvents.end(), events.begin() + std::max(0, elementsToRemove), events.end());
 
-            // Notify the empty condition variable to wake up any waiting threads
-            mEmptyCV.notify_one();
-        }
+        // Notify the empty condition variable to wake up any waiting threads
+        mEmptyCV.notify_one();
     }
+}
 
 } // namespace tensorrt_llm::batch_manager::kv_cache_manager
@@ -27,6 +27,7 @@ KvCacheConfig::KvCacheConfig(bool enableBlockReuse, std::optional<SizeType32> co
     std::optional<size_t> const& hostCacheSize, bool onboardBlocks,
     std::optional<FloatType> const& crossKvCacheFraction, std::optional<RetentionPriority> secondaryOffloadMinPriority,
     size_t eventBufferMaxSize, bool enablePartialReuse, bool copyOnPartialReuse, bool useUvm,
+    SizeType32 attentionDpEventsGatherPeriodMs,
     std::optional<tensorrt_llm::runtime::RuntimeDefaults> const& runtimeDefaults)
     : mEnableBlockReuse(enableBlockReuse)
     , mHostCacheSize(hostCacheSize)
@@ -36,6 +37,7 @@ KvCacheConfig::KvCacheConfig(bool enableBlockReuse, std::optional<SizeType32> co
     , mEnablePartialReuse{enablePartialReuse}
     , mCopyOnPartialReuse{copyOnPartialReuse}
     , mUseUvm{useUvm}
+    , mAttentionDpEventsGatherPeriodMs(attentionDpEventsGatherPeriodMs)
 {
     if (maxTokens)
     {
@@ -61,6 +63,8 @@ KvCacheConfig::KvCacheConfig(bool enableBlockReuse, std::optional<SizeType32> co
     {
         fillEmptyFieldsFromRuntimeDefaults(runtimeDefaults.value());
     }
+    TLLM_CHECK_WITH_INFO(
+        mAttentionDpEventsGatherPeriodMs > 0, "Attention DP events gather period must be greater than 0");
 }
 
 bool KvCacheConfig::getEnableBlockReuse() const
@@ -128,6 +132,11 @@ bool KvCacheConfig::getUseUvm() const
     return mUseUvm;
 }
 
+SizeType32 KvCacheConfig::getAttentionDpEventsGatherPeriodMs() const
+{
+    return mAttentionDpEventsGatherPeriodMs;
+}
+
 void KvCacheConfig::setEnableBlockReuse(bool enableBlockReuse)
 {
     mEnableBlockReuse = enableBlockReuse;
@@ -204,6 +213,12 @@ void KvCacheConfig::setUseUvm(bool useUvm)
     mUseUvm = useUvm;
 }
 
+void KvCacheConfig::setAttentionDpEventsGatherPeriodMs(SizeType32 attentionDpEventPollPeriodMs)
+{
+    TLLM_CHECK(attentionDpEventPollPeriodMs > 0);
+    mAttentionDpEventsGatherPeriodMs = attentionDpEventPollPeriodMs;
+}
+
 void KvCacheConfig::fillEmptyFieldsFromRuntimeDefaults(tensorrt_llm::runtime::RuntimeDefaults const& runtimeDefaults)
 {
     if (!mMaxAttentionWindowVec && runtimeDefaults.maxAttentionWindowVec)
Original file line number	Diff line number	Diff line change
`@@ -27,6 +27,7 @@ KvCacheConfig::KvCacheConfig(bool enableBlockReuse, std::optional<SizeType32> co`
`27`	`27`	`std::optional<size_t> const& hostCacheSize, bool onboardBlocks,`
`28`	`28`	`std::optional<FloatType> const& crossKvCacheFraction, std::optional<RetentionPriority> secondaryOffloadMinPriority,`
`29`	`29`	`size_t eventBufferMaxSize, bool enablePartialReuse, bool copyOnPartialReuse, bool useUvm,`
	`30`	`+ SizeType32 attentionDpEventsGatherPeriodMs,`
`30`	`31`	`std::optional<tensorrt_llm::runtime::RuntimeDefaults> const& runtimeDefaults)`
`31`	`32`	`: mEnableBlockReuse(enableBlockReuse)`
`32`	`33`	`, mHostCacheSize(hostCacheSize)`
`@@ -36,6 +37,7 @@ KvCacheConfig::KvCacheConfig(bool enableBlockReuse, std::optional<SizeType32> co`
`36`	`37`	`, mEnablePartialReuse{enablePartialReuse}`
`37`	`38`	`, mCopyOnPartialReuse{copyOnPartialReuse}`
`38`	`39`	`, mUseUvm{useUvm}`
	`40`	`+ , mAttentionDpEventsGatherPeriodMs(attentionDpEventsGatherPeriodMs)`
`39`	`41`	`{`
`40`	`42`	`if (maxTokens)`
`41`	`43`	`{`
`@@ -61,6 +63,8 @@ KvCacheConfig::KvCacheConfig(bool enableBlockReuse, std::optional<SizeType32> co`
`61`	`63`	`{`
`62`	`64`	`fillEmptyFieldsFromRuntimeDefaults(runtimeDefaults.value());`
`63`	`65`	`}`
	`66`	`+ TLLM_CHECK_WITH_INFO(`
	`67`	`+ mAttentionDpEventsGatherPeriodMs > 0, "Attention DP events gather period must be greater than 0");`
`64`	`68`	`}`
`65`	`69`
`66`	`70`	`bool KvCacheConfig::getEnableBlockReuse() const`
`@@ -128,6 +132,11 @@ bool KvCacheConfig::getUseUvm() const`
`128`	`132`	`return mUseUvm;`
`129`	`133`	`}`
`130`	`134`
	`135`	`+SizeType32 KvCacheConfig::getAttentionDpEventsGatherPeriodMs() const`
	`136`	`+{`
	`137`	`+ return mAttentionDpEventsGatherPeriodMs;`
	`138`	`+}`
	`139`	`+`
`131`	`140`	`void KvCacheConfig::setEnableBlockReuse(bool enableBlockReuse)`
`132`	`141`	`{`
`133`	`142`	`mEnableBlockReuse = enableBlockReuse;`
`@@ -204,6 +213,12 @@ void KvCacheConfig::setUseUvm(bool useUvm)`
`204`	`213`	`mUseUvm = useUvm;`
`205`	`214`	`}`
`206`	`215`
	`216`	`+void KvCacheConfig::setAttentionDpEventsGatherPeriodMs(SizeType32 attentionDpEventPollPeriodMs)`
	`217`	`+{`
	`218`	`+ TLLM_CHECK(attentionDpEventPollPeriodMs > 0);`
	`219`	`+ mAttentionDpEventsGatherPeriodMs = attentionDpEventPollPeriodMs;`
	`220`	`+}`
	`221`	`+`
`207`	`222`	`void KvCacheConfig::fillEmptyFieldsFromRuntimeDefaults(tensorrt_llm::runtime::RuntimeDefaults const& runtimeDefaults)`
`208`	`223`	`{`
`209`	`224`	`if (!mMaxAttentionWindowVec && runtimeDefaults.maxAttentionWindowVec)`