Skip to content

Commit b6d1784

Browse files
haircommanderbertinatto
authored andcommitted
UPSTREAM: <carry>: disable load balancing on created cgroups when managed is enabled
Previously, cpu load balancing was enabled in cri-o by manually changing the sched_domain of cpus in sysfs. However, RHEL 9 dropped support for this knob, instead requiring it be changed in cgroups directly. To enable cpu load balancing on cgroupv1, the specified cgroup must have cpuset.sched_load_balance set to 0, as well as all of that cgroup's parents, plus all of the cgroups that contain a subset of the cpus that load balancing is disabled for. By default, all cpusets inherit the set from their parent and sched_load_balance as 1. Since we need to keep the cpus that need load balancing disabled in the root cgroup, all slices will inherit the full cpuset. Rather than rebalancing every cgroup whenever a new guaranteed cpuset cgroup is created, the approach this PR takes is to set load balancing to disabled for all slices. Since slices definitionally don't have any processes in them, setting load balancing won't affect the actual scheduling decisions of the kernel. All it will do is open the opportunity for CRI-O to set the actually set load balancing to disabled for containers that request it. Signed-off-by: Peter Hunt <[email protected]> UPSTREAM: <carry>: kubelet/cm: disable cpu load balancing on slices when using static cpu manager policy There are situations where cpu load balance disabling is desired when the kubelet is not in managed state. Instead of using that condition, set the cpu load balancing parameter for new slices when the cpu policy is static Signed-off-by: Peter Hunt <[email protected]>
1 parent 9c0f022 commit b6d1784

File tree

4 files changed

+30
-0
lines changed

4 files changed

+30
-0
lines changed

pkg/kubelet/cm/cgroup_manager_linux.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727
"sync"
2828
"time"
2929

30+
"github.com/opencontainers/runc/libcontainer/cgroups"
3031
libcontainercgroups "github.com/opencontainers/runc/libcontainer/cgroups"
3132
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
3233
"github.com/opencontainers/runc/libcontainer/cgroups/manager"
@@ -150,6 +151,10 @@ type cgroupManagerImpl struct {
150151

151152
// useSystemd tells if systemd cgroup manager should be used.
152153
useSystemd bool
154+
155+
// cpuLoadBalanceDisable tells whether kubelet should disable
156+
// cpu load balancing on new cgroups it creates.
157+
cpuLoadBalanceDisable bool
153158
}
154159

155160
// Make sure that cgroupManagerImpl implements the CgroupManager interface
@@ -476,6 +481,19 @@ func (m *cgroupManagerImpl) Create(cgroupConfig *CgroupConfig) error {
476481
utilruntime.HandleError(fmt.Errorf("cgroup manager.Set failed: %w", err))
477482
}
478483

484+
// Disable cpuset.sched_load_balance for all cgroups Kubelet creates.
485+
// This way, CRI can disable sched_load_balance for pods that must have load balance
486+
// disabled, but the slices can contain all cpus (as the guaranteed cpus are known dynamically).
487+
if m.cpuLoadBalanceDisable && !libcontainercgroups.IsCgroup2UnifiedMode() {
488+
path := manager.Path("cpuset")
489+
if path == "" {
490+
return fmt.Errorf("Failed to find cpuset for newly created cgroup")
491+
}
492+
if err := cgroups.WriteFile(path, "cpuset.sched_load_balance", "0"); err != nil {
493+
return err
494+
}
495+
}
496+
479497
return nil
480498
}
481499

@@ -747,3 +765,7 @@ func (m *cgroupManagerImpl) SetCgroupConfig(name CgroupName, resource v1.Resourc
747765
}
748766
return nil
749767
}
768+
769+
func (m *cgroupManagerImpl) SetCPULoadBalanceDisable() {
770+
m.cpuLoadBalanceDisable = true
771+
}

pkg/kubelet/cm/cgroup_manager_unsupported.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,9 @@ func (m *unsupportedCgroupManager) SetCgroupConfig(name CgroupName, resource v1.
8989
return errNotSupported
9090
}
9191

92+
func (m *unsupportedCgroupManager) SetCPULoadBalanceDisable() {
93+
}
94+
9295
var RootCgroupName = CgroupName([]string{})
9396

9497
func NewCgroupName(base CgroupName, components ...string) CgroupName {

pkg/kubelet/cm/container_manager_linux.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,9 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I
248248
// Turn CgroupRoot from a string (in cgroupfs path format) to internal CgroupName
249249
cgroupRoot := ParseCgroupfsToCgroupName(nodeConfig.CgroupRoot)
250250
cgroupManager := NewCgroupManager(subsystems, nodeConfig.CgroupDriver)
251+
if nodeConfig.CPUManagerPolicy == string(cpumanager.PolicyStatic) {
252+
cgroupManager.SetCPULoadBalanceDisable()
253+
}
251254
// Check if Cgroup-root actually exists on the node
252255
if nodeConfig.CgroupsPerQOS {
253256
// this does default to / when enabled, but this tests against regressions.

pkg/kubelet/cm/types.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,8 @@ type CgroupManager interface {
8888
GetCgroupConfig(name CgroupName, resource v1.ResourceName) (*ResourceConfig, error)
8989
// Set resource config for the specified resource type on the cgroup
9090
SetCgroupConfig(name CgroupName, resource v1.ResourceName, resourceConfig *ResourceConfig) error
91+
// Toggle whether CPU load balancing should be disabled for new cgroups the kubelet creates
92+
SetCPULoadBalanceDisable()
9193
}
9294

9395
// QOSContainersInfo stores the names of containers per qos

0 commit comments

Comments
 (0)