@@ -718,6 +718,7 @@ func (qjm *XController) addTotalSnapshotResourcesConsumedByAw(totalgpu int32, to
718718
719719func (qjm * XController ) getAggregatedAvailableResourcesPriority (unallocatedClusterResources * clusterstateapi.
720720 Resource , targetpr float64 , requestingJob * arbv1.AppWrapper , agentId string ) (* clusterstateapi.Resource , []* arbv1.AppWrapper ) {
721+ //get available free resources in the cluster.
721722 r := unallocatedClusterResources .Clone ()
722723 // Track preemption resources
723724 preemptable := clusterstateapi .EmptyResource ()
@@ -732,7 +733,10 @@ func (qjm *XController) getAggregatedAvailableResourcesPriority(unallocatedClust
732733 klog .Errorf ("[getAggAvaiResPri] Unable to obtain the list of queueJobs %+v" , err )
733734 return r , nil
734735 }
735-
736+ //for all AWs that have canRun status are true
737+ //in non-preemption mode, we reserve resources for AWs
738+ //reserving is done by subtracting total AW resources from pods owned by AW that are running or completed.
739+ // AW can be running but items owned by it can be completed or there might be new set of pods yet to be spawned
736740 for _ , value := range queueJobs {
737741 klog .V (10 ).Infof ("[getAggAvaiResPri] %s: Evaluating job: %s to calculate aggregated resources." , time .Now ().String (), value .Name )
738742 if value .Name == requestingJob .Name {
@@ -797,10 +801,11 @@ func (qjm *XController) getAggregatedAvailableResourcesPriority(unallocatedClust
797801
798802 totalResource := qjm .addTotalSnapshotResourcesConsumedByAw (value .Status .TotalGPU , value .Status .TotalCPU , value .Status .TotalMemory )
799803 klog .V (6 ).Infof ("[getAggAvaiResPri] total resources consumed by Appwrapper %v when CanRun are %v" , value .Name , totalResource )
800- pending , err = qjv .NonNegSub (totalResource )
804+ delta , err := qjv .NonNegSub (totalResource )
805+ pending = pending .Add (delta )
801806 if err != nil {
802807 klog .Warningf ("[getAggAvaiResPri] Subtraction of resources failed, adding entire appwrapper resoources %v, %v" , qjv , err )
803- pending = qjv
808+ pending = pending . Add ( qjv )
804809 }
805810 klog .V (6 ).Infof ("[getAggAvaiResPri] The value of pending is %v" , pending )
806811 continue
0 commit comments