Skip to content
This repository was archived by the owner on Apr 17, 2025. It is now read-only.

Commit db2831b

Browse files
committed
Add liveness and startup/readiness probes
This adds liveness and startup/readiness probes to the HNC Deployment using functionality recently added to controller-runtime. The probes configuration might need some adjustments of timeouts etc. to work without issues for a typical HNC deployment. The default configuration should probably be adjusted; timeouts etc., and should be tested in a live cluster - which I do not have available. Testing: Ran all tests successfully on my local workstation, when using the simple ping probe. But when changing to the deeper webhook probe, the container never becomes ready. Seems to be an issue related to cert-rotator - which I do not know at all. We use cert-manager. On request, Adrian Ludwin took a look at my issues, and somehow this seems to work for him.
1 parent 281b24f commit db2831b

File tree

2 files changed

+32
-5
lines changed

2 files changed

+32
-5
lines changed

cmd/manager/main.go

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ var (
5151
)
5252

5353
var (
54+
probeAddr string
5455
metricsAddr string
5556
enableStackdriver bool
5657
maxReconciles int
@@ -89,6 +90,7 @@ func main() {
8990
metricsCleanupFn := enableMetrics()
9091
defer metricsCleanupFn()
9192
mgr := createManager()
93+
setupChecks(mgr)
9294

9395
// Make sure certs are generated and valid if webhooks are enabled and internal certs are used.
9496
setupLog.Info("Starting certificate generation")
@@ -112,6 +114,7 @@ func main() {
112114

113115
func parseFlags() {
114116
setupLog.Info("Parsing flags")
117+
flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
115118
flag.StringVar(&metricsAddr, "metrics-addr", ":8080", "The address the metric endpoint binds to.")
116119
flag.BoolVar(&enableStackdriver, "enable-stackdriver", true, "If true, export metrics to stackdriver")
117120
flag.BoolVar(&enableLeaderElection, "enable-leader-election", false,
@@ -217,11 +220,12 @@ func createManager() ctrl.Manager {
217220
// it turns out to be harmful.
218221
cfg.Burst = int(cfg.QPS * 1.5)
219222
mgr, err := ctrl.NewManager(cfg, ctrl.Options{
220-
Scheme: scheme,
221-
MetricsBindAddress: metricsAddr,
222-
LeaderElection: enableLeaderElection,
223-
LeaderElectionID: leaderElectionId,
224-
Port: webhookServerPort,
223+
Scheme: scheme,
224+
MetricsBindAddress: metricsAddr,
225+
HealthProbeBindAddress: probeAddr,
226+
LeaderElection: enableLeaderElection,
227+
LeaderElectionID: leaderElectionId,
228+
Port: webhookServerPort,
225229
})
226230
if err != nil {
227231
setupLog.Error(err, "unable to create manager")
@@ -230,6 +234,17 @@ func createManager() ctrl.Manager {
230234
return mgr
231235
}
232236

237+
func setupChecks(mgr ctrl.Manager) {
238+
if err := mgr.AddHealthzCheck("healthz", mgr.GetWebhookServer().StartedChecker()); err != nil {
239+
setupLog.Error(err, "unable to set up health check")
240+
os.Exit(1)
241+
}
242+
if err := mgr.AddReadyzCheck("readyz", mgr.GetWebhookServer().StartedChecker()); err != nil {
243+
setupLog.Error(err, "unable to set up ready check")
244+
os.Exit(1)
245+
}
246+
}
247+
233248
func startControllers(mgr ctrl.Manager, certsReady chan struct{}) {
234249
// The controllers won't work until the webhooks are operating, and those won't work until the
235250
// certs are all in place.

config/manager/manager.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,18 @@ spec:
5050
- "--excluded-namespace=kube-node-lease"
5151
image: controller:latest
5252
name: manager
53+
livenessProbe:
54+
httpGet:
55+
path: /healthz
56+
port: 8081
57+
failureThreshold: 1
58+
periodSeconds: 10
59+
startupProbe:
60+
httpGet:
61+
path: /readyz
62+
port: 8081
63+
failureThreshold: 100
64+
periodSeconds: 5
5365
resources:
5466
limits:
5567
cpu: 100m

0 commit comments

Comments
 (0)