From b88c038bf42f6df425d697dfea8ebc07bc6195a6 Mon Sep 17 00:00:00 2001 From: Robert Vasek Date: Mon, 14 Aug 2023 16:54:45 +0200 Subject: [PATCH] added automount-reconciler component This commit adds a new component automount-reconciler deployed as a separate nodeplugin container. It watches for broken mounts in /cvmfs and unmounts them. autofs will then automatically remount when the path is accessed again. --- Makefile | 4 + cmd/automount-reconciler/main.go | 66 +++++++ deployments/docker/Dockerfile | 1 + .../templates/nodeplugin-daemonset.yaml | 24 +++ deployments/helm/cvmfs-csi/values.yaml | 19 ++ go.mod | 2 +- .../cvmfs/automount/reconciler/reconciler.go | 169 ++++++++++++++++++ internal/cvmfs/node/mountutil.go | 18 +- internal/mountutils/mount.go | 38 ++++ 9 files changed, 324 insertions(+), 17 deletions(-) create mode 100644 cmd/automount-reconciler/main.go create mode 100644 internal/cvmfs/automount/reconciler/reconciler.go create mode 100644 internal/mountutils/mount.go diff --git a/Makefile b/Makefile index 7ca6facd..dc2bf4b4 100644 --- a/Makefile +++ b/Makefile @@ -89,6 +89,9 @@ $(BINDIR)/csi-cvmfsplugin: $(SRC) $(BINDIR)/automount-runner: $(SRC) go build $(GOFLAGS) -tags '$(TAGS)' -ldflags '$(LDFLAGS)' -o $@ ./cmd/automount-runner +$(BINDIR)/automount-reconciler: $(SRC) + go build $(GOFLAGS) -tags '$(TAGS)' -ldflags '$(LDFLAGS)' -o $@ ./cmd/automount-reconciler + $(BINDIR)/singlemount-runner: $(SRC) go build $(GOFLAGS) -tags '$(TAGS)' -ldflags '$(LDFLAGS)' -o $@ ./cmd/singlemount-runner @@ -97,6 +100,7 @@ build-cross: LDFLAGS += -extldflags "-static" build-cross: $(GOX) $(SRC) CGO_ENABLED=0 $(GOX) -parallel=$(GOX_PARALLEL) -output="$(BINDIR)/{{.OS}}-{{.Arch}}/csi-cvmfsplugin" -osarch='$(TARGETS)' $(GOFLAGS) -tags '$(TAGS)' -ldflags '$(LDFLAGS)' ./cmd/csi-cvmfsplugin CGO_ENABLED=0 $(GOX) -parallel=$(GOX_PARALLEL) -output="$(BINDIR)/{{.OS}}-{{.Arch}}/automount-runner" -osarch='$(TARGETS)' $(GOFLAGS) -tags '$(TAGS)' -ldflags '$(LDFLAGS)' ./cmd/automount-runner + CGO_ENABLED=0 $(GOX) -parallel=$(GOX_PARALLEL) -output="$(BINDIR)/{{.OS}}-{{.Arch}}/automount-reconciler" -osarch='$(TARGETS)' $(GOFLAGS) -tags '$(TAGS)' -ldflags '$(LDFLAGS)' ./cmd/automount-reconciler CGO_ENABLED=0 $(GOX) -parallel=$(GOX_PARALLEL) -output="$(BINDIR)/{{.OS}}-{{.Arch}}/singlemount-runner" -osarch='$(TARGETS)' $(GOFLAGS) -tags '$(TAGS)' -ldflags '$(LDFLAGS)' ./cmd/singlemount-runner # ------------------------------------------------------------------------------ diff --git a/cmd/automount-reconciler/main.go b/cmd/automount-reconciler/main.go new file mode 100644 index 00000000..fbe4c1b8 --- /dev/null +++ b/cmd/automount-reconciler/main.go @@ -0,0 +1,66 @@ +// Copyright CERN. +// +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package main + +import ( + "flag" + "fmt" + "os" + "time" + + "github.com/cvmfs-contrib/cvmfs-csi/internal/cvmfs/automount/reconciler" + "github.com/cvmfs-contrib/cvmfs-csi/internal/log" + cvmfsversion "github.com/cvmfs-contrib/cvmfs-csi/internal/version" + + "k8s.io/klog/v2" +) + +var ( + version = flag.Bool("version", false, "Print driver version and exit.") + period = flag.Duration("period", time.Second*30, "How often to check and reconcile autofs-managed CVMFS mounts.") +) + +func main() { + // Handle flags and initialize logging. + + klog.InitFlags(nil) + if err := flag.Set("logtostderr", "true"); err != nil { + klog.Exitf("failed to set logtostderr flag: %v", err) + } + flag.Parse() + + if *version { + fmt.Println("automount-reconciler for CVMFS CSI plugin version", cvmfsversion.FullVersion()) + os.Exit(0) + } + + // Initialize and run mount-reconciler. + + log.Infof("automount-reconciler for CVMFS CSI plugin version %s", cvmfsversion.FullVersion()) + log.Infof("Command line arguments %v", os.Args) + + // Run blocking. + + err := mountreconcile.RunBlocking(&mountreconcile.Opts{ + Period: *period, + }) + if err != nil { + log.Fatalf("Failed to run mount-reconciler: %v", err) + } + + os.Exit(0) +} diff --git a/deployments/docker/Dockerfile b/deployments/docker/Dockerfile index 7d482fcb..77912afa 100644 --- a/deployments/docker/Dockerfile +++ b/deployments/docker/Dockerfile @@ -39,4 +39,5 @@ LABEL org.opencontainers.image.title="cvmfs-csi" \ COPY bin/linux-${TARGETARCH}/csi-cvmfsplugin /csi-cvmfsplugin COPY bin/linux-${TARGETARCH}/automount-runner /automount-runner +COPY bin/linux-${TARGETARCH}/automount-reconciler /automount-reconciler COPY bin/linux-${TARGETARCH}/singlemount-runner /singlemount-runner diff --git a/deployments/helm/cvmfs-csi/templates/nodeplugin-daemonset.yaml b/deployments/helm/cvmfs-csi/templates/nodeplugin-daemonset.yaml index 66ccfa63..a743ef00 100644 --- a/deployments/helm/cvmfs-csi/templates/nodeplugin-daemonset.yaml +++ b/deployments/helm/cvmfs-csi/templates/nodeplugin-daemonset.yaml @@ -131,6 +131,30 @@ spec: {{- with .Values.nodeplugin.plugin.resources }} resources: {{ toYaml . | nindent 12 }} {{- end }} + - name: automount-reconciler + image: {{ .Values.nodeplugin.automountReconciler.image.repository }}:{{ .Values.nodeplugin.automountReconciler.image.tag }} + command: [/automount-reconciler] + args: + - -v={{ .Values.logVerbosityLevel }} + - --period={{ .Values.automountReconcilePeriod }} + imagePullPolicy: {{ .Values.nodeplugin.automountReconciler.image.pullPolicy }} + securityContext: + privileged: true + capabilities: + add: ["SYS_ADMIN"] + allowPrivilegeEscalation: true + volumeMounts: + - name: autofs-root + mountPath: /cvmfs + mountPropagation: Bidirectional + - name: cvmfs-localcache + mountPath: /cvmfs-localcache + {{- with .Values.nodeplugin.automountReconciler.extraVolumeMounts }} + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.nodeplugin.automountReconciler.resources }} + resources: {{ toYaml . | nindent 12 }} + {{- end }} - name: singlemount image: {{ .Values.nodeplugin.singlemount.image.repository }}:{{ .Values.nodeplugin.singlemount.image.tag }} command: diff --git a/deployments/helm/cvmfs-csi/values.yaml b/deployments/helm/cvmfs-csi/values.yaml index 5f0b14f0..41bf98ba 100644 --- a/deployments/helm/cvmfs-csi/values.yaml +++ b/deployments/helm/cvmfs-csi/values.yaml @@ -88,6 +88,22 @@ nodeplugin: - name: etc-cvmfs-config-d mountPath: /etc/cvmfs/config.d + # automount-reconciler image and container resources specs. + automountReconciler: + image: + repository: registry.cern.ch/magnum/cvmfs-csi + tag: latest + pullPolicy: IfNotPresent + resources: {} + # Extra volume mounts to append to nodeplugin's + # Pod.spec.containers[name="automountReconciler"].volumeMounts. + extraVolumeMounts: + - name: etc-cvmfs-default-conf + mountPath: /etc/cvmfs/default.local + subPath: default.local + - name: etc-cvmfs-config-d + mountPath: /etc/cvmfs/config.d + # automount-runner image and container resources specs. singlemount: image: @@ -238,6 +254,9 @@ cvmfsCSIPluginSocketFile: csi.sock # The directory will be created if it doesn't exist. automountHostPath: /var/cvmfs +# How often to check and reconcile autofs-managed CVMFS mounts. +automountReconcilePeriod: 30s + # Number of seconds to wait for automount daemon to start up before exiting. automountDaemonStartupTimeout: 10 # Number of seconds of idle time after which an autofs-managed CVMFS mount will diff --git a/go.mod b/go.mod index 9386dcf6..9e0c6a01 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,7 @@ go 1.20 require ( github.com/container-storage-interface/spec v1.8.0 github.com/kubernetes-csi/csi-lib-utils v0.13.0 + github.com/moby/sys/mountinfo v0.6.2 google.golang.org/grpc v1.55.0 google.golang.org/protobuf v1.30.0 k8s.io/apimachinery v0.26.3 @@ -15,7 +16,6 @@ require ( require ( github.com/go-logr/logr v1.2.3 // indirect github.com/golang/protobuf v1.5.3 // indirect - github.com/moby/sys/mountinfo v0.6.2 // indirect golang.org/x/net v0.8.0 // indirect golang.org/x/sys v0.6.0 // indirect golang.org/x/text v0.8.0 // indirect diff --git a/internal/cvmfs/automount/reconciler/reconciler.go b/internal/cvmfs/automount/reconciler/reconciler.go new file mode 100644 index 00000000..18848de0 --- /dev/null +++ b/internal/cvmfs/automount/reconciler/reconciler.go @@ -0,0 +1,169 @@ +// Copyright CERN. +// +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package mountreconcile + +import ( + "bytes" + "fmt" + "os" + goexec "os/exec" + "path" + "strings" + "syscall" + "time" + + "github.com/cvmfs-contrib/cvmfs-csi/internal/exec" + "github.com/cvmfs-contrib/cvmfs-csi/internal/log" + "github.com/cvmfs-contrib/cvmfs-csi/internal/mountutils" + + "github.com/moby/sys/mountinfo" +) + +const mountPathPrefix = "/cvmfs/" + +type Opts struct { + Period time.Duration +} + +func RunBlocking(o *Opts) error { + t := time.NewTicker(o.Period) + + for { + select { + case <-t.C: + log.Tracef("Reconciling /cvmfs") + if err := reconcile(); err != nil { + log.Errorf("Failed to reconcile /cvmfs: %v", err) + } + } + } +} + +// List CVMFS mounts in /cvmfs that the kernel knows about. +// We do that by listing mounts in /proc/self/mountinfo and filtering +// those where the device is "fuse" and the mountpoint is rooted in /cvmfs. +func getMountedRepositories() ([]string, error) { + cvmfsMountInfos, err := mountinfo.GetMounts(func(info *mountinfo.Info) (skip, stop bool) { + return info.FSType != "fuse" || !strings.HasPrefix(info.Mountpoint, mountPathPrefix), + false + }) + if err != nil { + return nil, err + } + + repositories := make([]string, len(cvmfsMountInfos)) + + for i := range cvmfsMountInfos { + repositories[i] = cvmfsMountInfos[i].Mountpoint[len(mountPathPrefix):] + } + + return repositories, nil +} + +func doCvmfsTalk(repo, command string) ([]byte, error) { + return exec.CombinedOutput( + goexec.Command( + "cvmfs_talk", + "-i", repo, + command, + ), + ) +} + +// repoNeedsUnmount checks if a /cvmfs/ mountpoint is healthy. +// Because mounts under /cvmfs are managed by autofs, we cannot check +// them directly (with a stat() for example), as this would trigger +// autofs's unmount timeout reset. Instead, we use cvmfs_talk to probe +// for CVMFS client, and only if this fails with "Connection refused", +// we use stat("/cvmfs/") to check the mount. +func repoNeedsUnmount(repo string) (bool, error) { + out, err := doCvmfsTalk(repo, "mountpoint") + if err == nil { + if bytes.HasPrefix(out, []byte(mountPathPrefix)) { + return false, nil + } + + // The mountpoint is outside of /cvmfs? + // Normally this shouldn't happen, report an error. + return false, fmt.Errorf( + "repository is mounted at an unexpected location \"%s\", expected /cvmfs", out) + } + + // The CVMFS client exited unexpectedly, and the watchdog + // didn't remount it automatically. + const cvmfsErrConnRefused = "(111 - Connection refused)\x0A" + const cvmfsErrClientNotRunning = "Seems like CernVM-FS is not running" + + if bytes.HasSuffix(out, []byte(cvmfsErrConnRefused)) || + bytes.HasPrefix(out, []byte(cvmfsErrClientNotRunning)) { + // It seems that the CVMFS client exited. + // Use stat syscall to check for ENOTCONN, i.e. the mount is corrupted, + // confirming what cvmfs_talk returned. + + _, err := os.Stat(path.Join(mountPathPrefix, repo)) + if err != nil { + if err.(*os.PathError).Err == syscall.ENOTCONN { + return true, nil + } + + // It's something else. + return false, fmt.Errorf("unexpected error from stat: %v", err) + } + + // stat should have failed! Fall through and fail. + } + + // If we got here, the error reported by cvmfs_talk + // is something else and we should fail too. + return false, fmt.Errorf("failed to talk to CVMFS client (%v): %s", err, out) +} + +func reconcile() error { + // List mounted CVMFS repositories in /cvmfs. + + mountedRepos, err := getMountedRepositories() + if err != nil { + return err + } + + log.Tracef("CVMFS mounts in /cvmfs: %v", mountedRepos) + + // Check each mountpoint we found above. In case it's corrupted, + // we unmount it. autofs will then take care of automatically remounting + // it when the path is accessed. + + for _, repo := range mountedRepos { + needsUnmount, err := repoNeedsUnmount(repo) + mountpoint := path.Join(mountPathPrefix, repo) + + if err != nil { + log.Errorf("Failed to reconcile %s: %v", mountpoint, err) + continue + } + + if needsUnmount { + log.Infof("%s is corrupted, unmounting", mountpoint) + + if err := mountutils.Unmount(mountpoint); err != nil { + log.Errorf("Failed to unmount %s during mount reconciliation: %v", mountpoint, err) + continue + } + } + } + + return nil +} diff --git a/internal/cvmfs/node/mountutil.go b/internal/cvmfs/node/mountutil.go index fa76d363..c4d3437b 100644 --- a/internal/cvmfs/node/mountutil.go +++ b/internal/cvmfs/node/mountutil.go @@ -17,10 +17,10 @@ package node import ( - "bytes" goexec "os/exec" "github.com/cvmfs-contrib/cvmfs-csi/internal/exec" + "github.com/cvmfs-contrib/cvmfs-csi/internal/mountutils" ) func bindMount(from, to string) error { @@ -52,22 +52,8 @@ func slaveRecursiveBind(from, to string) error { return err } -func unmount(mountpoint string, extraArgs ...string) error { - out, err := exec.CombinedOutput(goexec.Command("umount", append(extraArgs, mountpoint)...)) - if err != nil { - // There are no well-defined exit codes for cases of "not mounted" - // and "doesn't exist". We need to check the output. - if bytes.HasSuffix(out, []byte(": not mounted")) || - bytes.Contains(out, []byte("No such file or directory")) { - return nil - } - } - - return err -} - func recursiveUnmount(mountpoint string) error { // We need recursive unmount because there are live mounts inside the bindmount. // Unmounting only the upper autofs mount would result in EBUSY. - return unmount(mountpoint, "--recursive") + return mountutils.Unmount(mountpoint, "--recursive") } diff --git a/internal/mountutils/mount.go b/internal/mountutils/mount.go new file mode 100644 index 00000000..c0eaa2ab --- /dev/null +++ b/internal/mountutils/mount.go @@ -0,0 +1,38 @@ +// Copyright CERN. +// +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package mountutils + +import ( + "bytes" + goexec "os/exec" + + "github.com/cvmfs-contrib/cvmfs-csi/internal/exec" +) + +func Unmount(mountpoint string, extraArgs ...string) error { + out, err := exec.CombinedOutput(goexec.Command("umount", append(extraArgs, mountpoint)...)) + if err != nil { + // There are no well-defined exit codes for cases of "not mounted" + // and "doesn't exist". We need to check the output. + if bytes.HasSuffix(out, []byte(": not mounted")) || + bytes.Contains(out, []byte("No such file or directory")) { + return nil + } + } + + return err +}