From 2fd80c0fb62baf866f6bbed2cc163842a04afe8d Mon Sep 17 00:00:00 2001 From: Kazuyoshi Kato Date: Fri, 31 Jan 2020 10:28:19 -0800 Subject: [PATCH 1/4] Test that Machine#Wait() with context cancellation The method should guarantee that the underlying process is dead, but it doesn't. Signed-off-by: Kazuyoshi Kato --- machine_test.go | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/machine_test.go b/machine_test.go index 1d39061f..fa6cf3b5 100644 --- a/machine_test.go +++ b/machine_test.go @@ -1114,6 +1114,54 @@ func TestWaitWithKill(t *testing.T) { require.Error(t, err, "Firecracker was killed and it must be reported") } +func isProcessAlive(pid int) (bool, error) { + // Using kill(2) with signal=0 to check the existence of the process, + // because os.FindProcess always returns a process, regardless of whether the process is + // alive or not. + // https://golang.org/pkg/os/#FindProcess + err := syscall.Kill(pid, syscall.Signal(0)) + if err != nil { + if errno, ok := err.(syscall.Errno); ok { + if errno == syscall.ESRCH { + return false, nil + } + } + } + return true, nil +} + +func TestWaitWithCancel(t *testing.T) { + fctesting.RequiresRoot(t) + ctx, cancel := context.WithCancel(context.Background()) + + socketPath := filepath.Join(testDataPath, t.Name()) + defer os.Remove(socketPath) + + cfg := createValidConfig(t, socketPath) + cmd := VMCommandBuilder{}. + WithSocketPath(cfg.SocketPath). + WithBin(getFirecrackerBinaryPath()). + Build(ctx) + m, err := NewMachine(ctx, cfg, WithProcessRunner(cmd)) + require.NoError(t, err) + + err = m.Start(ctx) + require.NoError(t, err) + + pid, err := m.PID() + require.NoError(t, err) + + go func() { + cancel() + }() + + err = m.Wait(context.Background()) + require.Error(t, err, "Firecracker was killed and it must be reported") + + alive, err := isProcessAlive(pid) + require.False(t, alive, "The process must not be there") +} + func TestWaitWithInvalidBinary(t *testing.T) { ctx := context.Background() From 8dba8bd3d84288ae9a3e344e64a936d3e74fb136 Mon Sep 17 00:00:00 2001 From: Kazuyoshi Kato Date: Fri, 31 Jan 2020 10:45:09 -0800 Subject: [PATCH 2/4] Don't close m.exitCh just after context cancelletion The context is used to let the SDK kill its underlying Firecracker process. Closing the channel has to be happening after the actual termination of the process. Signed-off-by: Kazuyoshi Kato --- machine.go | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/machine.go b/machine.go index 333d4e3d..266c95a2 100644 --- a/machine.go +++ b/machine.go @@ -516,14 +516,19 @@ func (m *Machine) startVMM(ctx context.Context) error { return err } + + // This goroutine is used to kill the process by context cancelletion, + // but doesn't tell anyone about that. go func() { - select { - case <-ctx.Done(): - m.fatalErr = ctx.Err() - case err := <-errCh: - m.fatalErr = err - } + <-ctx.Done() + m.stopVMM() + }() + // This goroutine is used to tell clients that the process is stopped + // (gracefully or not). + go func() { + m.fatalErr = <-errCh + m.logger.Debugf("closing the exitCh %v", m.fatalErr) close(m.exitCh) }() From ec7ef18ae17d3b52aecb26b23acca3708ec4071b Mon Sep 17 00:00:00 2001 From: Kazuyoshi Kato Date: Fri, 31 Jan 2020 11:08:46 -0800 Subject: [PATCH 3/4] Combine multiple test cases around Wait() Regardless of how the machine is stopped, Wait() should gurantee that the process is not there. Signed-off-by: Kazuyoshi Kato --- machine_test.go | 117 +++++++++++++++++++++++++----------------------- 1 file changed, 62 insertions(+), 55 deletions(-) diff --git a/machine_test.go b/machine_test.go index fa6cf3b5..0b5e4dee 100644 --- a/machine_test.go +++ b/machine_test.go @@ -1081,37 +1081,76 @@ func TestCaptureFifoToFile_leak(t *testing.T) { assert.Contains(t, loggerBuffer.String(), `file already closed`, "log") } -func TestWaitWithKill(t *testing.T) { +// Replace filesystem-unsafe characters (such as /) which are often seen in Go's test names +var fsSafeTestName = strings.NewReplacer("/", "_") + +func TestWait(t *testing.T) { fctesting.RequiresRoot(t) - ctx := context.Background() - socketPath := filepath.Join(testDataPath, t.Name()) - defer os.Remove(socketPath) + cases := []struct { + name string + stop func(m *Machine) + }{ + { + name: "StopVMM", + stop: func(m *Machine) { + err := m.StopVMM() + require.NoError(t, err) + }, + }, + { + name: "Kill", + stop: func(m *Machine) { + pid, err := m.PID() + require.NoError(t, err) + + process, err := os.FindProcess(pid) + err = process.Kill() + require.NoError(t, err) + }, + }, + { + name: "Context Cancel", + }, + } - cfg := createValidConfig(t, socketPath) - cmd := VMCommandBuilder{}. - WithSocketPath(cfg.SocketPath). - WithBin(getFirecrackerBinaryPath()). - Build(ctx) - m, err := NewMachine(ctx, cfg, WithProcessRunner(cmd)) - require.NoError(t, err) + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + ctx := context.Background() + vmContext, vmCancel := context.WithCancel(context.Background()) - err = m.Start(ctx) - require.NoError(t, err) + socketPath := filepath.Join(testDataPath, fsSafeTestName.Replace(t.Name())) + defer os.Remove(socketPath) - go func() { - pid, err := m.PID() - require.NoError(t, err) + cfg := createValidConfig(t, socketPath) + cmd := VMCommandBuilder{}. + WithSocketPath(cfg.SocketPath). + WithBin(getFirecrackerBinaryPath()). + Build(vmContext) + m, err := NewMachine(ctx, cfg, WithProcessRunner(cmd)) + require.NoError(t, err) - process, err := os.FindProcess(pid) - require.NoError(t, err) + err = m.Start(ctx) + require.NoError(t, err) - err = process.Kill() - require.NoError(t, err) - }() + pid, err := m.PID() + require.NoError(t, err) - err = m.Wait(ctx) - require.Error(t, err, "Firecracker was killed and it must be reported") + go func() { + if c.stop != nil { + c.stop(m) + } else { + vmCancel() + } + }() + + err = m.Wait(ctx) + require.Error(t, err, "Firecracker was killed and it must be reported") + + alive, err := isProcessAlive(pid) + require.False(t, alive, "pid=%d is still there", pid) + }) + } } func isProcessAlive(pid int) (bool, error) { @@ -1130,38 +1169,6 @@ func isProcessAlive(pid int) (bool, error) { return true, nil } -func TestWaitWithCancel(t *testing.T) { - fctesting.RequiresRoot(t) - ctx, cancel := context.WithCancel(context.Background()) - - socketPath := filepath.Join(testDataPath, t.Name()) - defer os.Remove(socketPath) - - cfg := createValidConfig(t, socketPath) - cmd := VMCommandBuilder{}. - WithSocketPath(cfg.SocketPath). - WithBin(getFirecrackerBinaryPath()). - Build(ctx) - m, err := NewMachine(ctx, cfg, WithProcessRunner(cmd)) - require.NoError(t, err) - - err = m.Start(ctx) - require.NoError(t, err) - - pid, err := m.PID() - require.NoError(t, err) - - go func() { - cancel() - }() - - err = m.Wait(context.Background()) - require.Error(t, err, "Firecracker was killed and it must be reported") - - alive, err := isProcessAlive(pid) - require.False(t, alive, "The process must not be there") -} - func TestWaitWithInvalidBinary(t *testing.T) { ctx := context.Background() From 25ecff7d0a0a34aae094c1fd60d73b1653dd675f Mon Sep 17 00:00:00 2001 From: Kazuyoshi Kato Date: Mon, 3 Feb 2020 15:51:01 -0800 Subject: [PATCH 4/4] Use Start's context correctly VMCommandBuilder#Build()'s context directly controls exec.Cmd. To test the goroutines inside Machine#startVMM(), we cannot use VMCommandBuilder. Signed-off-by: Kazuyoshi Kato --- machine.go | 10 ++++++-- machine_test.go | 67 ++++++++++++++++++++++++++----------------------- 2 files changed, 43 insertions(+), 34 deletions(-) diff --git a/machine.go b/machine.go index 266c95a2..a0eb3128 100644 --- a/machine.go +++ b/machine.go @@ -338,7 +338,10 @@ func NewMachine(ctx context.Context, cfg Config, opts ...Opt) (*Machine, error) return m, nil } -// Start will iterate through the handler list and call each handler. If an +// Start actually start a Firecracker microVM. +// The context must not be cancelled while the microVM is running. +// +// It will iterate through the handler list and call each handler. If an // error occurred during handler execution, that error will be returned. If the // handlers succeed, then this will start the VMM instance. // Start may only be called once per Machine. Subsequent calls will return @@ -521,7 +524,10 @@ func (m *Machine) startVMM(ctx context.Context) error { // but doesn't tell anyone about that. go func() { <-ctx.Done() - m.stopVMM() + err := m.stopVMM() + if err != nil { + m.logger.WithError(err).Errorf("failed to stop vm %q", m.Cfg.VMID) + } }() // This goroutine is used to tell clients that the process is stopped diff --git a/machine_test.go b/machine_test.go index 0b5e4dee..2ee7945d 100644 --- a/machine_test.go +++ b/machine_test.go @@ -1089,18 +1089,18 @@ func TestWait(t *testing.T) { cases := []struct { name string - stop func(m *Machine) + stop func(m *Machine, cancel context.CancelFunc) }{ { name: "StopVMM", - stop: func(m *Machine) { + stop: func(m *Machine, _ context.CancelFunc) { err := m.StopVMM() require.NoError(t, err) }, }, { name: "Kill", - stop: func(m *Machine) { + stop: func(m *Machine, cancel context.CancelFunc) { pid, err := m.PID() require.NoError(t, err) @@ -1111,6 +1111,17 @@ func TestWait(t *testing.T) { }, { name: "Context Cancel", + stop: func(m *Machine, cancel context.CancelFunc) { + cancel() + }, + }, + { + name: "StopVMM + Context Cancel", + stop: func(m *Machine, cancel context.CancelFunc) { + m.StopVMM() + time.Sleep(1 * time.Second) + cancel() + }, }, } @@ -1123,50 +1134,42 @@ func TestWait(t *testing.T) { defer os.Remove(socketPath) cfg := createValidConfig(t, socketPath) - cmd := VMCommandBuilder{}. - WithSocketPath(cfg.SocketPath). - WithBin(getFirecrackerBinaryPath()). - Build(vmContext) - m, err := NewMachine(ctx, cfg, WithProcessRunner(cmd)) + m, err := NewMachine(ctx, cfg, func(m *Machine) { + // Rewriting m.cmd partially wouldn't work since Cmd has + // some unexported members + args := m.cmd.Args[1:] + m.cmd = exec.Command(getFirecrackerBinaryPath(), args...) + }) require.NoError(t, err) - err = m.Start(ctx) + err = m.Start(vmContext) require.NoError(t, err) pid, err := m.PID() require.NoError(t, err) + var wg sync.WaitGroup + wg.Add(1) go func() { - if c.stop != nil { - c.stop(m) - } else { - vmCancel() - } + defer wg.Done() + c.stop(m, vmCancel) }() err = m.Wait(ctx) require.Error(t, err, "Firecracker was killed and it must be reported") + t.Logf("err = %v", err) - alive, err := isProcessAlive(pid) - require.False(t, alive, "pid=%d is still there", pid) - }) - } -} + proc, err := os.FindProcess(pid) + // Having an error here doesn't mean the process is not there. + // In fact it won't be non-nil on Unix systems + require.NoError(t, err) -func isProcessAlive(pid int) (bool, error) { - // Using kill(2) with signal=0 to check the existence of the process, - // because os.FindProcess always returns a process, regardless of whether the process is - // alive or not. - // https://golang.org/pkg/os/#FindProcess - err := syscall.Kill(pid, syscall.Signal(0)) - if err != nil { - if errno, ok := err.(syscall.Errno); ok { - if errno == syscall.ESRCH { - return false, nil - } - } + err = proc.Signal(syscall.Signal(0)) + require.Equal(t, "os: process already finished", err.Error()) + + wg.Wait() + }) } - return true, nil } func TestWaitWithInvalidBinary(t *testing.T) {