NVIDIA
diff --git a/‎.coderabbit.yaml‎
Lines changed: 0 additions & 1 deletion b/‎.coderabbit.yaml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎.github/pull_request_template.md‎
Lines changed: 12 additions & 6 deletions b/‎.github/pull_request_template.md‎
Lines changed: 12 additions & 6 deletions
diff --git a/‎.github/workflows/pr-check.yml‎
Lines changed: 55 additions & 0 deletions b/‎.github/workflows/pr-check.yml‎
Lines changed: 55 additions & 0 deletions
diff --git a/‎3rdparty/xgrammar‎ b/‎3rdparty/xgrammar‎
diff --git a/‎cpp/include/tensorrt_llm/runtime/gptDecoder.h‎
Lines changed: 0 additions & 1 deletion b/‎cpp/include/tensorrt_llm/runtime/gptDecoder.h‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎cpp/tensorrt_llm/batch_manager/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion b/‎cpp/tensorrt_llm/batch_manager/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎cpp/tensorrt_llm/batch_manager/cacheFormatter.cpp‎
Lines changed: 32 additions & 4 deletions b/‎cpp/tensorrt_llm/batch_manager/cacheFormatter.cpp‎
Lines changed: 32 additions & 4 deletions
diff --git a/‎cpp/tensorrt_llm/batch_manager/cacheFormatter.h‎
Lines changed: 9 additions & 1 deletion b/‎cpp/tensorrt_llm/batch_manager/cacheFormatter.h‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎cpp/tensorrt_llm/batch_manager/dataTransceiver.h‎
Lines changed: 22 additions & 7 deletions b/‎cpp/tensorrt_llm/batch_manager/dataTransceiver.h‎
Lines changed: 22 additions & 7 deletions
diff --git a/‎cpp/tensorrt_llm/batch_manager/dataTransceiverImpl.cpp‎
Lines changed: 2 additions & 0 deletions b/‎cpp/tensorrt_llm/batch_manager/dataTransceiverImpl.cpp‎
Lines changed: 2 additions & 0 deletions
@@ -28,7 +28,6 @@ reviews:
   related_prs: true
   suggested_labels: true
   suggested_reviewers: true
-  auto_assign_reviewers: true
   poem: false
   auto_review:
     drafts: true
 
@@ -3,15 +3,21 @@
 <!--
 Please write the PR title by following this template:
 
-[JIRA ticket/NVBugs ID/GitHub issue][fix/feat/doc/infra/...] \<summary of this PR\>
+**[JIRA ticket/NVBugs ID/GitHub issue/None][type] Summary**
 
-For example, assume I have a PR to support a new feature about cache manager for JIRA ticket TRTLLM-1000, it would be like:
+Valid ticket formats:
+  - JIRA ticket: [TRTLLM-1234] or [FOOBAR-123] for other FOOBAR project
+  - NVBugs ID: [https://nvbugs/1234567]
+  - GitHub issue: [#1234]
+  - No ticket: [None]
 
-[TRTLLM-1000][feat] Support a new feature about cache manager
+Valid types (lowercase): [fix], [feat], [doc], [infra], [chore], etc.
 
-Or I have a PR to fix a Llama3 accuracy issue:
-
-[https://nvbugs/1234567][fix] Fix Llama3 accuracy issue
+Examples:
+  - [TRTLLM-1234][feat] Add new feature
+  - [https://nvbugs/1234567][fix] Fix some bugs
+  - [#1234][doc] Update documentation
+  - [None][chore] Minor clean-up
 -->
 
 ## Description
 
@@ -0,0 +1,55 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: PR Checks
+
+on:
+  pull_request:
+    types: [opened, edited, synchronize, reopened]
+
+jobs:
+  check-pr-title:
+    name: Check PR Title Format
+    runs-on: ubuntu-latest
+    steps:
+      - name: Validate PR Title Format
+        id: check-pr-title
+        uses: agenthunt/[email protected]
+        continue-on-error: true
+        with:
+          pr-title-regex: "^(\\[(None|[A-Z0-9]+-[0-9]+|#[0-9]+|https:\\/\\/nvbugs\\/[0-9]+)\\])(\\[[a-z0-9]+\\]) (([^ ].*)?[^ ])$"
+          pr-body-regex: ""
+
+      - name: PR Title Format Guide
+        if: steps.check-pr-title.outcome == 'failure'
+        run: |
+          echo "::error::PR title format check failed."
+          echo "Expected PR title format:"
+          echo "  [JIRA ticket/NVBugs ID/GitHub issue/None][type] Summary"
+          echo ""
+          echo "Valid ticket formats:"
+          echo "  - JIRA ticket: [TRTLLM-1234] or [FOOBAR-123] for other FOOBAR project"
+          echo "  - NVBugs ID: [https://nvbugs/1234567]"
+          echo "  - GitHub issue: [#1234]"
+          echo "  - No ticket: [None]"
+          echo ""
+          echo "Valid types (lowercase): [fix], [feat], [doc], [infra], [chore], etc."
+          echo ""
+          echo "Examples:"
+          echo "  - [TRTLLM-1234][feat] Add new feature"
+          echo "  - [https://nvbugs/1234567][fix] Fix some bugs"
+          echo "  - [#1234][doc] Update documentation"
+          echo "  - [None][chore] Minor clean-up"
+          exit 1
@@ -20,7 +20,6 @@
 #include "tensorrt_llm/runtime/bufferManager.h"
 #include "tensorrt_llm/runtime/decodingInput.h"
 #include "tensorrt_llm/runtime/decodingOutput.h"
-#include "tensorrt_llm/runtime/request.h"
 #include "tensorrt_llm/runtime/samplingConfig.h"
 
 #include <NvInferRuntime.h>
 
@@ -59,7 +59,7 @@ set(SRCS
 
 file(GLOB_RECURSE XGRAMMAR_SRCS "${3RDPARTY_DIR}/xgrammar/cpp/*.cc")
 list(FILTER XGRAMMAR_SRCS EXCLUDE REGEX
-     "${3RDPARTY_DIR}/xgrammar/cpp/pybind/.*\\.cc")
+     "${3RDPARTY_DIR}/xgrammar/cpp/nanobind/.*\\.cc")
 list(APPEND SRCS ${XGRAMMAR_SRCS})
 
 if(NOT WIN32)
 
@@ -166,6 +166,9 @@ void CacheFormatter::format(TransferSession& session)
     auto const numPools = blockManager.getNumPools();
     // TODO(oargov): are we sure the other side has the same number of pools? this might not hold for pp_size>1...
 
+    auto lastTokenTime = llmRequest.getPerfMetrics().timingMetrics.lastTokenTime;
+    bool recordDelay = lastTokenTime != std::chrono::steady_clock::time_point();
+
     bool layerWise = common::getEnvDisaggLayerwise() && numPools == 1;
     if (layerWise)
     {
@@ -350,9 +353,14 @@ void CacheFormatter::format(TransferSession& session)
             }
 
             auto endTime = std::chrono::steady_clock::now();
+            double delay = 0.0;
+            if (recordDelay)
+            {
+                delay = std::chrono::duration<double, std::milli>(startTime - lastTokenTime).count();
+            }
             double cacheTransferTime
                 = std::max(0.0, std::chrono::duration<double, std::milli>(endTime - startTime).count());
-            kvCacheMeasureHelper.appendKVCacheTransfer(llmRequest.mRequestId, cacheTransferTime, size);
+            kvCacheMeasureHelper.appendKVCacheTransfer(llmRequest.mRequestId, delay, cacheTransferTime, size);
         };
 
         if (connections.size() > 1)
@@ -408,16 +416,19 @@ void CacheFormatter::unformat(TransferSession& session)
 {
     NVTX3_SCOPED_RANGE(CacheFormatter_unformat);
     auto const& llmRequest = session.getLlmRequest();
+    auto const ctxReqId = llmRequest.getContextPhaseParams().value().getReqId();
     TLLM_LOG_DEBUG(mpi::MpiComm::world().getRank(),
-        "Start receiving KV cache for request ID: %ld, context request ID: %ld.", llmRequest.mRequestId,
-        llmRequest.getContextPhaseParams().value().getReqId());
+        "Start receiving KV cache for request ID: %ld, context request ID: %ld.", llmRequest.mRequestId, ctxReqId);
     auto const& connections = session.getConnections();
     auto const& selfConfig = session.getSelfState().getCacheState().value();
     auto const& destConfig = session.getOtherState().getCacheState().value();
     auto const selfIdx = session.getSelfState().getCommState().value().getSelfIdx();
     auto& bufferManager = session.getBufferManager();
     auto blockRange = getBlockRangeForReceiving(mCacheManager, llmRequest);
 
+    auto arrivalTime = llmRequest.getPerfMetrics().timingMetrics.arrivalTime;
+    bool recordDelay = arrivalTime != std::chrono::steady_clock::time_point();
+
     auto pickUpConnections = pickRecvConnections(connections.size(), selfConfig, selfIdx, destConfig);
 
     TLLM_LOG_DEBUG("pickUpConnections size: %d connections size: %d", pickUpConnections.size(), connections.size());
@@ -546,7 +557,7 @@ void CacheFormatter::unformat(TransferSession& session)
                 }
                 TLLM_LOG_DEBUG(mpi::MpiComm::world().getRank(),
                     "End receiving KV cache for request ID: %ld, context request ID: %ld.", llmRequest.mRequestId,
-                    llmRequest.getContextPhaseParams().value().getReqId());
+                    ctxReqId);
                 return;
             }
             // legacyPath: context executor rank only send data to one gen executor rank. it sends multiple cache
@@ -634,6 +645,8 @@ void CacheFormatter::unformat(TransferSession& session)
                 TLLM_CUDA_CHECK(cudaSetDevice(deviceId));
                 TLLM_CHECK(pickUpConnections.size() > processIdx);
                 TLLM_CHECK(recvSplitCaches.size() > processIdx);
+                auto startTime = std::chrono::steady_clock::now();
+                size_t size = 0;
                 if (legacyPath)
                 {
                     size_t idx = processIdx * blockNum;
@@ -645,6 +658,7 @@ void CacheFormatter::unformat(TransferSession& session)
                         size_t recvBufferIdx = blockIdx * pickUpConnections.size() + commIdx;
                         llmRequest.updateKvCacheSize((*recvSplitCaches[recvBufferIdx]).getSizeInBytes());
                         auto& buffer = recvSplitCaches.at(recvBufferIdx);
+                        size += buffer->getSizeInBytes();
                         session.recv(pickUpConnections[processIdx], buffer->data(), buffer->getSizeInBytes());
                         idx++;
                     }
@@ -655,6 +669,7 @@ void CacheFormatter::unformat(TransferSession& session)
                     {
                         llmRequest.updateKvCacheSize((*recvSplitCaches.at(processIdx)).getSizeInBytes());
                         auto& buffer = recvSplitCaches[processIdx];
+                        size = buffer->getSizeInBytes();
                         session.recv(pickUpConnections[processIdx], buffer->data(), buffer->getSizeInBytes());
                     }
                     else if (bufferCoverTargetNum > 0)
@@ -663,6 +678,7 @@ void CacheFormatter::unformat(TransferSession& session)
                             + remainNoCoverTargetNum; // caches.at(recvBufferIdx) is allocated by cudaMalloc
                         llmRequest.updateKvCacheSize((*recvSplitCaches.at(recvBufferIdx)).getSizeInBytes());
                         auto& buffer = recvSplitCaches.at(recvBufferIdx);
+                        size = buffer->getSizeInBytes();
                         session.recv(pickUpConnections[processIdx], buffer->data(), buffer->getSizeInBytes());
                         bufferManager.copy(*recvSplitCaches.at(recvBufferIdx), *recvSplitCaches[processIdx]);
                         bufferManager.getStream().synchronize();
@@ -679,6 +695,7 @@ void CacheFormatter::unformat(TransferSession& session)
                             auto recvSlice = runtime::ITensor::slice(preAllocRecvBuffer, 0, recvSize);
                             auto copySlice = runtime::ITensor::slice(
                                 recvSplitCaches[processIdx], targetBufferSize - remainRecvSize, recvSize);
+                            size += recvSlice->getSizeInBytes();
                             llmRequest.updateKvCacheSize((*recvSlice).getSizeInBytes());
                             session.recv(pickUpConnections[processIdx], recvSlice->data(), recvSlice->getSizeInBytes());
                             bufferManager.copy(*recvSlice, *copySlice);
@@ -687,6 +704,15 @@ void CacheFormatter::unformat(TransferSession& session)
                         }
                     }
                 }
+                auto endTime = std::chrono::steady_clock::now();
+                double delay = 0.0;
+                if (recordDelay)
+                {
+                    delay = std::chrono::duration<double, std::milli>(startTime - arrivalTime).count();
+                }
+                double cacheTransferTime
+                    = std::max(0.0, std::chrono::duration<double, std::milli>(endTime - startTime).count());
+                kvCacheMeasureHelper.appendKVCacheTransfer(ctxReqId, delay, cacheTransferTime, size);
             };
             if (pickUpConnections.size() > 1)
             {
@@ -814,6 +840,8 @@ void CacheFormatter::unformat(TransferSession& session)
     if (selfConfig.getModelConfig().mNbKvHeadsPerLayer.size() != destConfig.getModelConfig().mNbKvHeadsPerLayer.size())
     {
         TLLM_LOG_WARNING("CacheFormatter::inquireSupport: only support same number of layers");
+        TLLM_LOG_WARNING("self: %zu dest %zu", selfConfig.getModelConfig().mNbKvHeadsPerLayer.size(),
+            destConfig.getModelConfig().mNbKvHeadsPerLayer.size());
         return false;
     }
     int selfNumLayers = selfConfig.getModelConfig().mNbKvHeadsPerLayer.size();
 
@@ -76,6 +76,15 @@ class BaseCacheFormatter
 
     /// @brief Destructor.
     virtual ~BaseCacheFormatter() = default;
+
+    // TODO: better way for context/generation tagging
+    void markAsSender(bool isSender)
+    {
+        kvCacheMeasureHelper.markAsSender(isSender);
+    }
+
+protected:
+    KvCacheMeasureHelper kvCacheMeasureHelper{common::getEnvKVCacheTransferOutputPath()};
 };
 
 // Simple cache block copy. Because it does not involve data splitting or merging, it performs best when the
@@ -115,7 +124,6 @@ class CacheFormatter final : public BaseCacheFormatter
 private:
     BaseKVCacheManager* mCacheManager;
     CacheTransBufferManager* mCacheTransBufferManager;
-    KvCacheMeasureHelper kvCacheMeasureHelper{common::getEnvKVCacheTransferOutputPath()};
 };
 
 std::unique_ptr<BaseCacheFormatter> createCacheFormatter(
 
@@ -269,12 +269,24 @@ class DataRequester
 class KvCacheMeasureHelper
 {
 public:
+    struct Measure
+    {
+        double delay;     // from last token (ctx) or arrival time (gen), in ms
+        double duration;  // in ms
+        double bandwidth; // in Gbps
+    };
+
     KvCacheMeasureHelper(std::string output_path)
         : mOutputPath(std::move(output_path))
     {
     }
 
-    void appendKVCacheTransfer(LlmRequest::RequestIdType requestId, double duration, size_t size)
+    void markAsSender(bool isSender)
+    {
+        mIsSender = isSender;
+    }
+
+    void appendKVCacheTransfer(LlmRequest::RequestIdType requestId, double delay, double duration, size_t size)
     {
         auto bandwidth = size * 8 / (duration / 1000) / 1e9;
         if (mOutputPath.empty())
@@ -283,15 +295,17 @@ class KvCacheMeasureHelper
         }
 
         std::lock_guard<std::mutex> lock(mMutex);
-        mRequestKVCacheTranfserMeasure[requestId].emplace_back(duration, bandwidth);
+        mRequestKVCacheTranfserMeasure[requestId].emplace_back(Measure{delay, duration, bandwidth});
     }
 
     ~KvCacheMeasureHelper()
     {
         if (!mRequestKVCacheTranfserMeasure.empty() && !mOutputPath.empty())
         {
+            TLLM_CHECK(mIsSender.has_value());
             auto rank = mpi::MpiComm::world().getRank();
-            std::string outFilePath = mOutputPath + "rank_" + std::to_string(rank) + ".txt";
+            std::string outFilePath
+                = mOutputPath + "rank_" + std::to_string(rank) + "_" + (mIsSender.value() ? "send" : "recv") + ".csv";
             std::ofstream outFile(outFilePath);
 
             TLLM_CHECK_WITH_INFO(outFile.is_open(), "Cannot write to file " + outFilePath);
@@ -301,17 +315,17 @@ class KvCacheMeasureHelper
             outFile << "RequestID";
             for (size_t i = 0; i < numTransferMeasure; i++)
             {
-                outFile << ",TimeDuration,Bandwidth";
+                outFile << ",Delay(ms),Duration(ms),Bandwidth(Gbps)";
             }
             outFile << '\n';
 
             for (auto const& [requestID, measures] : mRequestKVCacheTranfserMeasure)
             {
                 outFile << requestID;
 
-                for (auto const& [time, bandwidth] : measures)
+                for (auto const& measure : measures)
                 {
-                    outFile << "," << time << "," << bandwidth;
+                    outFile << "," << measure.delay << "," << measure.duration << "," << measure.bandwidth;
                 }
                 outFile << '\n';
             }
@@ -321,9 +335,10 @@ class KvCacheMeasureHelper
     }
 
 private:
-    std::map<LlmRequest::RequestIdType, std::vector<std::pair<double, double>>> mRequestKVCacheTranfserMeasure;
+    std::map<LlmRequest::RequestIdType, std::vector<Measure>> mRequestKVCacheTranfserMeasure;
     std::string mOutputPath;
     std::mutex mMutex;
+    std::optional<bool> mIsSender;
 };
 
 } // namespace tensorrt_llm::batch_manager
@@ -39,6 +39,7 @@ DataSenderImpl::DataSenderImpl(executor::kv_cache::ConnectionManager* manager,
 {
     TLLM_CHECK(mManager);
     TLLM_CHECK(mManager->getCommState().getSelfIdx() == selfIndex);
+    mFormatter->markAsSender(true);
 }
 
 [[nodiscard]] RequestInfo DataSenderImpl::recvRequestInfo()
@@ -136,6 +137,7 @@ DataReceiverImpl::DataReceiverImpl(executor::kv_cache::ConnectionManager* manage
     TLLM_CHECK(mManager);
     TLLM_CHECK(mManager->getCommState().getSelfIdx() == selfIndex);
     TLLM_CHECK(mFormatter);
+    mFormatter->markAsSender(false);
 }
 
 TransferSession DataReceiverImpl::sendRequestInfo(LlmRequest const& llmRequest)
Original file line number	Diff line number	Diff line change
`@@ -269,12 +269,24 @@ class DataRequester`
`269`	`269`	`class KvCacheMeasureHelper`
`270`	`270`	`{`
`271`	`271`	`public:`
	`272`	`+ struct Measure`
	`273`	`+ {`
	`274`	`+ double delay; // from last token (ctx) or arrival time (gen), in ms`
	`275`	`+ double duration; // in ms`
	`276`	`+ double bandwidth; // in Gbps`
	`277`	`+ };`
	`278`	`+`
`272`	`279`	`KvCacheMeasureHelper(std::string output_path)`
`273`	`280`	`: mOutputPath(std::move(output_path))`
`274`	`281`	`{`
`275`	`282`	`}`
`276`	`283`
`277`		`- void appendKVCacheTransfer(LlmRequest::RequestIdType requestId, double duration, size_t size)`
	`284`	`+ void markAsSender(bool isSender)`
	`285`	`+ {`
	`286`	`+ mIsSender = isSender;`
	`287`	`+ }`
	`288`	`+`
	`289`	`+ void appendKVCacheTransfer(LlmRequest::RequestIdType requestId, double delay, double duration, size_t size)`
`278`	`290`	`{`
`279`	`291`	`auto bandwidth = size * 8 / (duration / 1000) / 1e9;`
`280`	`292`	`if (mOutputPath.empty())`
`@@ -283,15 +295,17 @@ class KvCacheMeasureHelper`
`283`	`295`	`}`
`284`	`296`
`285`	`297`	`std::lock_guard<std::mutex> lock(mMutex);`
`286`		`- mRequestKVCacheTranfserMeasure[requestId].emplace_back(duration, bandwidth);`
	`298`	`+ mRequestKVCacheTranfserMeasure[requestId].emplace_back(Measure{delay, duration, bandwidth});`
`287`	`299`	`}`
`288`	`300`
`289`	`301`	`~KvCacheMeasureHelper()`
`290`	`302`	`{`
`291`	`303`	`if (!mRequestKVCacheTranfserMeasure.empty() && !mOutputPath.empty())`
`292`	`304`	`{`
	`305`	`+ TLLM_CHECK(mIsSender.has_value());`
`293`	`306`	`auto rank = mpi::MpiComm::world().getRank();`
`294`		`- std::string outFilePath = mOutputPath + "rank_" + std::to_string(rank) + ".txt";`
	`307`	`+ std::string outFilePath`
	`308`	`+ = mOutputPath + "rank_" + std::to_string(rank) + "_" + (mIsSender.value() ? "send" : "recv") + ".csv";`
`295`	`309`	`std::ofstream outFile(outFilePath);`
`296`	`310`
`297`	`311`	`TLLM_CHECK_WITH_INFO(outFile.is_open(), "Cannot write to file " + outFilePath);`
`@@ -301,17 +315,17 @@ class KvCacheMeasureHelper`
`301`	`315`	`outFile << "RequestID";`
`302`	`316`	`for (size_t i = 0; i < numTransferMeasure; i++)`
`303`	`317`	`{`
`304`		`- outFile << ",TimeDuration,Bandwidth";`
	`318`	`+ outFile << ",Delay(ms),Duration(ms),Bandwidth(Gbps)";`
`305`	`319`	`}`
`306`	`320`	`outFile << '\n';`
`307`	`321`
`308`	`322`	`for (auto const& [requestID, measures] : mRequestKVCacheTranfserMeasure)`
`309`	`323`	`{`
`310`	`324`	`outFile << requestID;`
`311`	`325`
`312`		`- for (auto const& [time, bandwidth] : measures)`
	`326`	`+ for (auto const& measure : measures)`
`313`	`327`	`{`
`314`		`- outFile << "," << time << "," << bandwidth;`
	`328`	`+ outFile << "," << measure.delay << "," << measure.duration << "," << measure.bandwidth;`
`315`	`329`	`}`
`316`	`330`	`outFile << '\n';`
`317`	`331`	`}`
`@@ -321,9 +335,10 @@ class KvCacheMeasureHelper`
`321`	`335`	`}`
`322`	`336`
`323`	`337`	`private:`
`324`		`- std::map<LlmRequest::RequestIdType, std::vector<std::pair<double, double>>> mRequestKVCacheTranfserMeasure;`
	`338`	`+ std::map<LlmRequest::RequestIdType, std::vector<Measure>> mRequestKVCacheTranfserMeasure;`
`325`	`339`	`std::string mOutputPath;`
`326`	`340`	`std::mutex mMutex;`
	`341`	`+ std::optional<bool> mIsSender;`
`327`	`342`	`};`
`328`	`343`
`329`	`344`	`} // namespace tensorrt_llm::batch_manager`
Original file line number	Diff line number	Diff line change
`@@ -39,6 +39,7 @@ DataSenderImpl::DataSenderImpl(executor::kv_cache::ConnectionManager* manager,`
`39`	`39`	`{`
`40`	`40`	`TLLM_CHECK(mManager);`
`41`	`41`	`TLLM_CHECK(mManager->getCommState().getSelfIdx() == selfIndex);`
	`42`	`+ mFormatter->markAsSender(true);`
`42`	`43`	`}`
`43`	`44`
`44`	`45`	`[[nodiscard]] RequestInfo DataSenderImpl::recvRequestInfo()`
`@@ -136,6 +137,7 @@ DataReceiverImpl::DataReceiverImpl(executor::kv_cache::ConnectionManager* manage`
`136`	`137`	`TLLM_CHECK(mManager);`
`137`	`138`	`TLLM_CHECK(mManager->getCommState().getSelfIdx() == selfIndex);`
`138`	`139`	`TLLM_CHECK(mFormatter);`
	`140`	`+ mFormatter->markAsSender(false);`
`139`	`141`	`}`
`140`	`142`
`141`	`143`	`TransferSession DataReceiverImpl::sendRequestInfo(LlmRequest const& llmRequest)`