Skip to content

Commit 697d0ff

Browse files
committed
simplifying
1 parent 333f2e1 commit 697d0ff

File tree

1 file changed

+38
-81
lines changed

1 file changed

+38
-81
lines changed

scheduler/scheduler_system.go

Lines changed: 38 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -333,29 +333,6 @@ func (s *SystemScheduler) computeJobAllocs() error {
333333
// find feasible nodes for all the task groups
334334
s.feasibleNodesForTG = s.findFeasibleNodesForTG(reconciliationResult.Update)
335335

336-
// any further logic depends on whether we're canarying or not
337-
isCanarying := map[string]bool{}
338-
if s.job != nil && s.deployment != nil {
339-
for _, tg := range s.job.TaskGroups {
340-
dstate, ok := s.deployment.TaskGroups[tg.Name]
341-
if !ok {
342-
continue
343-
}
344-
// a system job is canarying if:
345-
// - it has a non-empty update block (just a sanity check, all
346-
// submitted jobs should have a non-empty update block as part of
347-
// canonicalization)
348-
// - canary parameter in the update block has to be positive
349-
// - deployment has to be non-nil and it cannot have been promoted
350-
// - this cannot be the initial job version
351-
isCanarying[tg.Name] = !tg.Update.IsEmpty() &&
352-
tg.Update.Canary > 0 &&
353-
dstate != nil &&
354-
!dstate.Promoted &&
355-
s.job.Version != 0
356-
}
357-
}
358-
359336
// Treat non in-place updates as an eviction and new placement, which will
360337
// be limited by max_parallel
361338
s.limitReached = evictAndPlace(s.ctx, s.job, reconciliationResult, sstructs.StatusAllocUpdating)
@@ -398,45 +375,45 @@ func (s *SystemScheduler) computeJobAllocs() error {
398375
continue
399376
}
400377

378+
// we can set the desired total now, it's always the amount of all
379+
// feasible nodes
401380
s.deployment.TaskGroups[tg.Name].DesiredTotal = len(feasibleNodes)
402381

382+
dstate, ok := s.deployment.TaskGroups[tg.Name]
383+
if !ok {
384+
continue
385+
}
386+
// a system job is canarying if:
387+
// - it has a non-empty update block (just a sanity check, all
388+
// submitted jobs should have a non-empty update block as part of
389+
// canonicalization)
390+
// - canary parameter in the update block has to be positive
391+
// - deployment has to be non-nil and it cannot have been promoted
392+
// - this cannot be the initial job version
393+
isCanarying := !tg.Update.IsEmpty() &&
394+
tg.Update.Canary > 0 &&
395+
dstate != nil &&
396+
!dstate.Promoted &&
397+
s.job.Version != 0
398+
403399
// if this TG isn't canarying, we're done
404-
if !isCanarying[tg.Name] {
400+
if !isCanarying {
405401
continue
406402
}
407403

404+
// we can now also set the desired canaries: it's the tg.Update.Canary
405+
// percent of all the feasible nodes, rounded up to the nearest int
406+
requiredCanaries := int(math.Ceil(float64(tg.Update.Canary) * float64(len(feasibleNodes)) / 100))
407+
s.deployment.TaskGroups[tg.Name].DesiredCanaries = requiredCanaries
408+
408409
// Initially, if the job requires canaries, we place all of them on
409410
// all eligible nodes. At this point we know which nodes are
410411
// feasible, so we evict unnedded canaries.
411-
placedCanaries, err := s.evictUnneededCanaries(
412-
len(s.feasibleNodesForTG[tg.Name]),
413-
tg.Update.Canary,
414-
)
412+
placedCanaries, err := s.evictUnneededCanaries(requiredCanaries)
415413
if err != nil {
416414
return fmt.Errorf("failed to evict canaries for job '%s': %v", s.eval.JobID, err)
417415
}
418-
419-
s.deployment.TaskGroups[tg.Name].DesiredCanaries = len(placedCanaries)
420416
s.deployment.TaskGroups[tg.Name].PlacedCanaries = placedCanaries
421-
422-
for nodeID, allocs := range s.plan.NodeUpdate {
423-
filtered := []*structs.Allocation{}
424-
for _, a := range allocs {
425-
if a.DesiredDescription == sstructs.StatusAllocNotNeeded {
426-
filtered = append(filtered, a)
427-
continue
428-
}
429-
430-
// we only keep allocs that are in the plan.NodeAllocation for
431-
// this node
432-
if _, ok := s.plan.NodeAllocation[a.NodeID]; ok {
433-
filtered = append(filtered, a)
434-
}
435-
}
436-
437-
s.plan.NodeUpdate[nodeID] = filtered
438-
}
439-
440417
}
441418

442419
// check if the deployment is complete
@@ -543,38 +520,26 @@ func (s *SystemScheduler) computePlacements(
543520
deploymentID = s.deployment.ID
544521
}
545522

546-
nodes := make([]*structs.Node, 1)
547523
for _, missing := range reconcilerResult.Place {
548524
tgName := missing.TaskGroup.Name
549525

550-
var option *feasible.RankedNode
551-
552526
node, ok := nodeByID[missing.Alloc.NodeID]
553527
if !ok {
554528
s.logger.Debug("could not find node", "node", missing.Alloc.NodeID)
555529
continue
556530
}
557531

558-
// if we've already seen a feasible node for this tg, skip feasibility
559-
// checks for it
532+
// we're already performed feasibility check for all the task groups and
533+
// nodes, so look up
534+
var option *feasible.RankedNode
560535
optionsForTG := s.feasibleNodesForTG[tgName]
561-
if existing := slices.IndexFunc(optionsForTG, func(rn *feasible.RankedNode) bool { return rn.Node == node }); existing != -1 {
536+
if existing := slices.IndexFunc(
537+
optionsForTG,
538+
func(rn *feasible.RankedNode) bool { return rn.Node == node },
539+
); existing != -1 {
562540
option = optionsForTG[existing]
563541
}
564542

565-
// if we couldn't find any in the feasible nodes map, perform a
566-
// feasibility check for that node and TG
567-
if option == nil {
568-
// Update the set of placement nodes
569-
nodes[0] = node
570-
s.stack.SetNodes(nodes)
571-
572-
// Attempt to match the task group
573-
option = s.stack.Select(missing.TaskGroup, &feasible.SelectOptions{AllocName: missing.Name})
574-
}
575-
576-
// if we're still getting nothing, it means we don't have a feasible
577-
// node
578543
if option == nil {
579544
// If the task can't be placed on this node, update reporting data
580545
// and continue to short circuit the loop
@@ -804,20 +769,17 @@ func evictAndPlace(ctx feasible.Context, job *structs.Job, reconciled *reconcile
804769

805770
// evictAndPlaceCanaries checks how many canaries are needed against the amount
806771
// of feasible nodes, and removes unnecessary placements from the plan.
807-
func (s *SystemScheduler) evictUnneededCanaries(feasibleNodes int, canary int) ([]string, error) {
772+
func (s *SystemScheduler) evictUnneededCanaries(requiredCanaries int) ([]string, error) {
808773

809774
desiredCanaries := []string{} // FIXME: make this better
810775

811-
// calculate how many canary placement we expect each task group to have: it
812-
// should be the tg.update.canary percentage of eligible nodes, rounded up
813-
// to the nearest integer
814-
requiredCanaries := int(math.Ceil(float64(canary) * float64(feasibleNodes) / 100))
815-
816776
// no canaries to consider, quit early
817777
if requiredCanaries == 0 {
818778
return desiredCanaries, nil
819779
}
820780

781+
canaryCounter := requiredCanaries
782+
821783
// iterate over node allocations to find canary allocs
822784
for node, allocations := range s.plan.NodeAllocation {
823785
n := 0
@@ -826,8 +788,8 @@ func (s *SystemScheduler) evictUnneededCanaries(feasibleNodes int, canary int) (
826788
continue
827789
}
828790
if alloc.DeploymentStatus.Canary {
829-
if requiredCanaries != 0 {
830-
requiredCanaries -= 1
791+
if canaryCounter != 0 {
792+
canaryCounter -= 1
831793

832794
desiredCanaries = append(desiredCanaries, alloc.ID)
833795
allocations[n] = alloc // we do this in order to avoid allocating another slice
@@ -844,11 +806,6 @@ func (s *SystemScheduler) evictUnneededCanaries(feasibleNodes int, canary int) (
844806
func (s *SystemScheduler) isDeploymentComplete(groupName string, buckets *reconciler.NodeReconcileResult, isCanarying bool) bool {
845807
complete := len(buckets.Place)+len(buckets.Migrate)+len(buckets.Update) == 0
846808

847-
// fmt.Printf("complete? %v\n", complete)
848-
// fmt.Printf("buckets.Place: %v buckets.Migrate: %v buckets.Update: %v\n", len(buckets.Place), len(buckets.Migrate), len(buckets.Update))
849-
// fmt.Printf("s.deployment == nil? %v\n", s.deployment == nil)
850-
// fmt.Printf("isCanarying? %v\n", isCanarying)
851-
852809
if !complete || s.deployment == nil || isCanarying {
853810
return false
854811
}

0 commit comments

Comments
 (0)