@@ -39,25 +39,31 @@ TensorPtr collectRequestIds(RequestVector const& contextRequests, RequestVector
39
39
return requestIds;
40
40
}
41
41
42
- void sortRequests (RequestVector& contextRequests, RequestVector& generationRequests)
42
+ void sortRequests (RequestVector& contextRequests, RequestVector& generationRequests, bool chunksPresent )
43
43
{
44
44
TLLM_LOG_TRACE (" %s start" , __PRETTY_FUNCTION__);
45
45
46
- // Move context requests that reached the last context chunk to the end of the vector.
47
- // This order is required for moveFinishedContextRequestsToGeneration.
48
- auto firstFinished = std::partition (contextRequests.begin (), contextRequests.end (),
49
- [](auto const & llmReq) { return !llmReq->isLastContextChunk (); });
50
-
51
46
auto sortByLoraId = [](RequestVector::iterator begin, RequestVector::iterator end)
52
47
{
53
48
std::sort (
54
49
begin, end, [](auto const & lhs, auto const & rhs) { return lhs->getLoraTaskId () < rhs->getLoraTaskId (); });
55
50
};
56
51
57
- // Sort context requests by lora task id, but keep finished requests separate.
58
- sortByLoraId (contextRequests.begin (), firstFinished);
59
- sortByLoraId (firstFinished, contextRequests.end ());
60
- // Sort generation requests by lora task id.
52
+ if (chunksPresent)
53
+ {
54
+ // Move context requests that reached the last context chunk to the end of the vector.
55
+ // This order is required for moveFinishedContextRequestsToGeneration.
56
+ auto firstFinished = std::partition (contextRequests.begin (), contextRequests.end (),
57
+ [](auto const & llmReq) { return !llmReq->isLastContextChunk (); });
58
+
59
+ // Sort context requests by lora task id, but keep finished requests separate.
60
+ sortByLoraId (contextRequests.begin (), firstFinished);
61
+ sortByLoraId (firstFinished, contextRequests.end ());
62
+ }
63
+ else
64
+ {
65
+ sortByLoraId (contextRequests.begin (), contextRequests.end ());
66
+ }
61
67
sortByLoraId (generationRequests.begin (), generationRequests.end ());
62
68
63
69
TLLM_LOG_TRACE (" %s stop" , __PRETTY_FUNCTION__);
0 commit comments