feat: add three new etcd cold boot recovery tests

clobrano · clobrano · commit 90839694be67 · 2025-10-29T09:34:50.000+01:00
Add three new test cases to validate etcd cluster recovery from cold
boot scenarios reached through different graceful/ungraceful shutdown
combinations:

- Cold boot from double GNS: both nodes gracefully shut down
  simultaneously, then both restart (full cluster cold boot)
- Cold boot from sequential GNS: first node gracefully shut down, then
  second node gracefully shut down, then both restart
- Cold boot from mixed GNS/UGNS: first node gracefully shut down,
  surviving node then ungracefully shut down, then both restart

Note: The inverse case (UGNS first node, then GNS second) is not tested
because in TNF clusters, an ungracefully shut down node is quickly
recovered, preventing the ability to wait and gracefully shut down the
second node later. The double UGNS scenario is already covered by
existing tests.
diff --git a/test/extended/two_node/tnf_recovery.go b/test/extended/two_node/tnf_recovery.go
@@ -32,8 +32,9 @@ const (
 	memberPromotedVotingTimeout     = 15 * time.Minute
 	networkDisruptionDuration       = 15 * time.Second
 	vmRestartTimeout                = 5 * time.Minute
-	vmUngracefulShutdownTimeout     = 30 * time.Second // Ungraceful VM shutdown is typically fast
-	membersHealthyAfterDoubleReboot = 15 * time.Minute // It takes into account full VM recovering up to Etcd member healthy
+	vmUngracefulShutdownTimeout     = 30 * time.Second // Ungraceful shutdown is typically fast
+	vmGracefulShutdownTimeout       = 10 * time.Minute // Graceful shutdown is typically slow
+	membersHealthyAfterDoubleReboot = 15 * time.Minute // It takes into account full VM reboot and Etcd member healthy
 	pollInterval                    = 5 * time.Second
 )
 
@@ -189,20 +190,12 @@ var _ = g.Describe("[sig-etcd][apigroup:config.openshift.io][OCPFeatureGate:Dual
 		c, vmA, vmB, err := setupMinimalTestEnvironment(oc, &nodeA, &nodeB)
 		o.Expect(err).To(o.BeNil(), "Expected to setup test environment without error")
 
-		dataPair := []struct {
-			vm, node string
-		}{
+		dataPair := []vmNodePair{
 			{vmA, nodeA.Name},
 			{vmB, nodeB.Name},
 		}
 
-		defer func() {
-			for _, d := range dataPair {
-				if err := services.VirshStartVM(d.vm, &c.HypervisorConfig, c.HypervisorKnownHostsPath); err != nil {
-					fmt.Fprintf(g.GinkgoWriter, "Warning: failed to restart VM %s during cleanup: %v\n", d.vm, err)
-				}
-			}
-		}()
+		defer restartVms(dataPair, c)
 
 		g.By("Simulating double node failure: stopping both nodes' VMs")
 		// First, stop all VMs
@@ -217,21 +210,130 @@ var _ = g.Describe("[sig-etcd][apigroup:config.openshift.io][OCPFeatureGate:Dual
 		}
 
 		g.By("Restarting both nodes")
-		// Start all VMs
+		restartVms(dataPair, c)
+
+		g.By(fmt.Sprintf("Waiting both etcd members to become healthy (timeout: %v)", membersHealthyAfterDoubleReboot))
+		// Both nodes are expected to be healthy voting members. The order of nodes passed to the validation function does not matter.
+		validateEtcdRecoveryState(oc, etcdClientFactory,
+			&nodeA,
+			&nodeB, true, false,
+			membersHealthyAfterDoubleReboot, pollInterval)
+	})
+
+	g.It("should recover from double graceful node shutdown", func() {
+		// Note: Both nodes are gracefully shut down, then both restart
+		nodeA := peerNode
+		nodeB := targetNode
+		g.GinkgoT().Printf("Testing double node graceful shutdown for %s and %s\n", nodeA.Name, nodeB.Name)
+
+		c, vmA, vmB, err := setupMinimalTestEnvironment(oc, &nodeA, &nodeB)
+		o.Expect(err).To(o.BeNil(), "Expected to setup test environment without error")
+
+		dataPair := []vmNodePair{
+			{vmA, nodeA.Name},
+			{vmB, nodeB.Name},
+		}
+
+		defer restartVms(dataPair, c)
+
+		g.By(fmt.Sprintf("Gracefully shutting down both nodes at the same time (timeout: %v)", vmGracefulShutdownTimeout))
 		for _, d := range dataPair {
-			err := services.VirshStartVM(d.vm, &c.HypervisorConfig, c.HypervisorKnownHostsPath)
-			o.Expect(err).To(o.BeNil(), fmt.Sprintf("Expected to start VM %s (node: %s)", d.vm, d.node))
+			innerErr := services.VirshShutdownVM(d.vm, &c.HypervisorConfig, c.HypervisorKnownHostsPath)
+			o.Expect(innerErr).To(o.BeNil(), fmt.Sprintf("Expected to gracefully shutdown VM %s (node: %s)", d.vm, d.node))
 		}
-		// Wait for all to be running
+
 		for _, d := range dataPair {
-			err := services.WaitForVMState(d.vm, services.VMStateRunning, vmUngracefulShutdownTimeout, pollInterval, &c.HypervisorConfig, c.HypervisorKnownHostsPath)
-			o.Expect(err).To(o.BeNil(), fmt.Sprintf("Expected VM %s (node: %s) to start in %s timeout", d.vm, d.node, vmRestartTimeout))
+			innerErr := services.WaitForVMState(d.vm, services.VMStateShutOff, vmGracefulShutdownTimeout, pollInterval, &c.HypervisorConfig, c.HypervisorKnownHostsPath)
+			o.Expect(innerErr).To(o.BeNil(), fmt.Sprintf("Expected VM %s (node: %s) to reach shut off state", d.vm, d.node))
+		}
+
+		g.By("Restarting both nodes")
+		restartVms(dataPair, c)
+
+		g.By(fmt.Sprintf("Waiting both etcd members to become healthy (timeout: %v)", membersHealthyAfterDoubleReboot))
+		// Both nodes are expected to be healthy voting members. The order of nodes passed to the validation function does not matter.
+		validateEtcdRecoveryState(oc, etcdClientFactory,
+			&nodeA,
+			&nodeB, true, false,
+			membersHealthyAfterDoubleReboot, pollInterval)
+	})
+
+	g.It("should recover from sequential graceful node shutdowns", func() {
+		// Note: First node is gracefully shut down, then the second, then both restart
+		firstToShutdown := peerNode
+		secondToShutdown := targetNode
+		g.GinkgoT().Printf("Testing sequential graceful shutdowns: first %s, then %s\n",
+			firstToShutdown.Name, secondToShutdown.Name)
+
+		c, vmFirstToShutdown, vmSecondToShutdown, err := setupMinimalTestEnvironment(oc, &firstToShutdown, &secondToShutdown)
+		o.Expect(err).To(o.BeNil(), "Expected to setup test environment without error")
+
+		dataPair := []vmNodePair{
+			{vmFirstToShutdown, firstToShutdown.Name},
+			{vmSecondToShutdown, secondToShutdown.Name},
+		}
+
+		defer restartVms(dataPair, c)
+
+		g.By(fmt.Sprintf("Gracefully shutting down first node: %s", firstToShutdown.Name))
+
+		err = vmShutdownAndWait(VMShutdownModeGraceful, vmFirstToShutdown, c)
+		o.Expect(err).To(o.BeNil(), fmt.Sprintf("Expected VM %s to reach shut off state", vmFirstToShutdown))
+
+		g.By(fmt.Sprintf("Gracefully shutting down second node: %s", secondToShutdown.Name))
+		err = vmShutdownAndWait(VMShutdownModeGraceful, vmSecondToShutdown, c)
+		o.Expect(err).To(o.BeNil(), fmt.Sprintf("Expected VM %s to reach shut off state", vmSecondToShutdown))
+
+		g.By("Restarting both nodes")
+		restartVms(dataPair, c)
+
+		g.By(fmt.Sprintf("Waiting both etcd members to become healthy (timeout: %v)", membersHealthyAfterDoubleReboot))
+		// Both nodes are expected to be healthy voting members. The order of nodes passed to the validation function does not matter.
+		validateEtcdRecoveryState(oc, etcdClientFactory,
+			&firstToShutdown,
+			&secondToShutdown, true, false,
+			membersHealthyAfterDoubleReboot, pollInterval)
+	})
+
+	g.It("should recover from graceful shutdown followed by ungraceful node failure", func() {
+		// Note: First node is gracefully shut down, then the survived node fails ungracefully
+		firstToShutdown := targetNode
+		secondToShutdown := peerNode
+		g.GinkgoT().Printf("Randomly selected %s to shutdown gracefully and %s to survive, then fail ungracefully\n",
+			firstToShutdown.Name, secondToShutdown.Name)
+
+		c, vmFirstToShutdown, vmSecondToShutdown, err := setupMinimalTestEnvironment(oc, &firstToShutdown, &secondToShutdown)
+		o.Expect(err).To(o.BeNil(), "Expected to setup test environment without error")
+
+		dataPair := []vmNodePair{
+			{vmFirstToShutdown, firstToShutdown.Name},
+			{vmSecondToShutdown, secondToShutdown.Name},
 		}
 
+		defer restartVms(dataPair, c)
+
+		g.By(fmt.Sprintf("Gracefully shutting down VM %s (node: %s)", vmFirstToShutdown, firstToShutdown.Name))
+		err = vmShutdownAndWait(VMShutdownModeGraceful, vmFirstToShutdown, c)
+		o.Expect(err).To(o.BeNil(), fmt.Sprintf("Expected VM %s to reach shut off state", vmFirstToShutdown))
+
+		g.By(fmt.Sprintf("Waiting for %s to recover the etcd cluster standalone (timeout: %v)", secondToShutdown.Name, memberIsLeaderTimeout))
+		validateEtcdRecoveryState(oc, etcdClientFactory,
+			&secondToShutdown,
+			&firstToShutdown, false, true, // expected started == false, learner == true
+			memberIsLeaderTimeout, pollInterval)
+
+		g.By(fmt.Sprintf("Ungracefully shutting down VM %s (node: %s)", vmSecondToShutdown, secondToShutdown.Name))
+		err = vmShutdownAndWait(VMShutdownModeUngraceful, vmSecondToShutdown, c)
+		o.Expect(err).To(o.BeNil(), fmt.Sprintf("Expected VM %s to reach shut off state", vmSecondToShutdown))
+
+		g.By("Restarting both nodes")
+		restartVms(dataPair, c)
+
 		g.By(fmt.Sprintf("Waiting both etcd members to become healthy (timeout: %v)", membersHealthyAfterDoubleReboot))
+		// Both nodes are expected to be healthy voting members. The order of nodes passed to the validation function does not matter.
 		validateEtcdRecoveryState(oc, etcdClientFactory,
-			&nodeA,              // member on node A considered leader, hence started == true, learner == false
-			&nodeB, true, false, // member on node B expected started == true, learner == false
+			&firstToShutdown,
+			&secondToShutdown, true, false,
 			membersHealthyAfterDoubleReboot, pollInterval)
 	})
 })
@@ -336,6 +438,12 @@ func findClusterOperatorCondition(conditions []v1.ClusterOperatorStatusCondition
 	return nil
 }
 
+// validateEtcdRecoveryState polls the etcd cluster until the members match the expected state or a timeout is reached.
+//
+// This function assumes that the first node argument is always expected to be a healthy, voting member (isStarted=true, isLearner=false).
+// It validates the state of the second node argument against the provided `isTargetNodeStartedExpected` and `isTargetNodeLearnerExpected` booleans.
+//
+// When both nodes are expected to be healthy voting members, the order of the node arguments is interchangeable.
 func validateEtcdRecoveryState(
 	oc *util.CLI, e *helpers.EtcdClientFactoryImpl,
 	survivedNode, targetNode *corev1.Node,
@@ -541,3 +649,62 @@ func setupMinimalTestEnvironment(oc *util.CLI, nodeA, nodeB *corev1.Node) (c hyp
 
 	return
 }
+
+type vmNodePair struct {
+	vm, node string
+}
+
+type VMShutdownMode int
+
+const (
+	VMShutdownModeGraceful VMShutdownMode = iota + 1
+	VMShutdownModeUngraceful
+)
+
+func (sm VMShutdownMode) String() string {
+	switch sm {
+	case VMShutdownModeGraceful:
+		return "graceful VM shutdown"
+	case VMShutdownModeUngraceful:
+		return "ungraceful VM shutdown"
+	}
+	return "unknown vm shutdown mode"
+}
+
+func vmShutdownAndWait(mode VMShutdownMode, vm string, c hypervisorExtendedConfig) error {
+	var timeout time.Duration
+	var shutdownFunc func(vmName string, sshConfig *core.SSHConfig, knownHostsPath string) error
+	switch mode {
+	case VMShutdownModeGraceful:
+		timeout = vmGracefulShutdownTimeout
+		shutdownFunc = services.VirshShutdownVM
+	case VMShutdownModeUngraceful:
+		timeout = vmUngracefulShutdownTimeout
+		shutdownFunc = services.VirshDestroyVM
+	default:
+		return fmt.Errorf("unexpected VMShutdownMode: %s", mode)
+	}
+
+	g.GinkgoT().Printf("%s: vm %s (timeout: %v)\n", mode, vm, timeout)
+	err := shutdownFunc(vm, &c.HypervisorConfig, c.HypervisorKnownHostsPath)
+	if err != nil {
+		return err
+	}
+
+	return services.WaitForVMState(vm, services.VMStateShutOff, timeout, pollInterval, &c.HypervisorConfig, c.HypervisorKnownHostsPath)
+}
+
+func restartVms(dataPair []vmNodePair, c hypervisorExtendedConfig) {
+	// Start all VMs asynchronously
+	for _, d := range dataPair {
+		if err := services.VirshStartVM(d.vm, &c.HypervisorConfig, c.HypervisorKnownHostsPath); err != nil {
+			fmt.Fprintf(g.GinkgoWriter, "Warning: failed to restart VM %s during cleanup: %v\n", d.vm, err)
+		}
+	}
+
+	// Wait for all VMs to be running
+	for _, d := range dataPair {
+		err := services.WaitForVMState(d.vm, services.VMStateRunning, vmRestartTimeout, pollInterval, &c.HypervisorConfig, c.HypervisorKnownHostsPath)
+		o.Expect(err).To(o.BeNil(), fmt.Sprintf("Expected VM %s (node: %s) to start in %s timeout", d.vm, d.node, vmRestartTimeout))
+	}
+}