@@ -54,10 +54,6 @@ const (
5454 // MountPrefix is the annotation prefix for mount hints.
5555 MountPrefix = "gvisor.dev/spec/mount"
5656
57- // ChildContainersDir is the directory where child container root
58- // filesystems are mounted.
59- ChildContainersDir = "/__runsc_containers__"
60-
6157 // Filesystems that runsc supports.
6258 bind = "bind"
6359 devpts = "devpts"
@@ -256,10 +252,10 @@ func subtargets(root string, mnts []specs.Mount) []string {
256252
257253// setExecutablePath sets the procArgs.Filename by searching the PATH for an
258254// executable matching the procArgs.Argv[0].
259- func setExecutablePath (ctx context.Context , mns * fs. MountNamespace , procArgs * kernel.CreateProcessArgs ) error {
255+ func setExecutablePath (ctx context.Context , procArgs * kernel.CreateProcessArgs ) error {
260256 paths := fs .GetPath (procArgs .Envv )
261257 exe := procArgs .Argv [0 ]
262- f , err := mns .ResolveExecutablePath (ctx , procArgs .WorkingDirectory , exe , paths )
258+ f , err := procArgs . MountNamespace .ResolveExecutablePath (ctx , procArgs .WorkingDirectory , exe , paths )
263259 if err != nil {
264260 return fmt .Errorf ("searching for executable %q, cwd: %q, $PATH=%q: %v" , exe , procArgs .WorkingDirectory , strings .Join (paths , ":" ), err )
265261 }
@@ -514,11 +510,16 @@ func (c *containerMounter) setupFS(ctx context.Context, conf *Config, procArgs *
514510
515511 // If this is the root container, we also need to setup the root mount
516512 // namespace.
517- mns := c .k .RootMountNamespace ()
518- if mns == nil {
513+ rootMNS := c .k .RootMountNamespace ()
514+ if rootMNS == nil {
519515 // Setup the root container.
520- if err := c .setupRootContainer (ctx , rootCtx , conf , func (mns * fs.MountNamespace ) {
521- c .k .SetRootMountNamespace (mns )
516+ if err := c .setupRootContainer (ctx , rootCtx , conf , func (rootMNS * fs.MountNamespace ) {
517+ // The callback to setupRootContainer inherits a
518+ // reference on the rootMNS, so we don't need to take
519+ // an additional reference here.
520+ procArgs .MountNamespace = rootMNS
521+ procArgs .Root = rootMNS .Root ()
522+ c .k .SetRootMountNamespace (rootMNS )
522523 }); err != nil {
523524 return err
524525 }
@@ -527,54 +528,26 @@ func (c *containerMounter) setupFS(ctx context.Context, conf *Config, procArgs *
527528
528529 // Setup a child container.
529530 log .Infof ("Creating new process in child container." )
530- globalRoot := mns .Root ()
531- defer globalRoot .DecRef ()
532-
533- // Create mount point for the container's rootfs.
534- maxTraversals := uint (0 )
535- contDir , err := mns .FindInode (ctx , globalRoot , nil , ChildContainersDir , & maxTraversals )
536- if err != nil {
537- return fmt .Errorf ("couldn't find child container dir %q: %v" , ChildContainersDir , err )
538- }
539- if err := contDir .CreateDirectory (ctx , globalRoot , c .cid , fs .FilePermsFromMode (0755 )); err != nil {
540- return fmt .Errorf ("create directory %q: %v" , c .cid , err )
541- }
542- containerRoot , err := contDir .Walk (ctx , globalRoot , c .cid )
543- if err != nil {
544- return fmt .Errorf ("walk to %q failed: %v" , c .cid , err )
545- }
546- defer containerRoot .DecRef ()
547531
548- // Create the container's root filesystem mount.
532+ // Create a new root inode and mount namespace for the container .
549533 rootInode , err := c .createRootMount (rootCtx , conf )
550534 if err != nil {
551535 return fmt .Errorf ("creating filesystem for container: %v" , err )
552536 }
553-
554- // Mount the container's root filesystem to the newly created mount point.
555- if err := mns .Mount (ctx , containerRoot , rootInode ); err != nil {
556- return fmt .Errorf ("mount container root: %v" , err )
557- }
558-
559- // We have to re-walk to the dirent to find the mounted directory. The old
560- // dirent is invalid at this point.
561- containerRoot , err = contDir .Walk (ctx , globalRoot , c .cid )
537+ mns , err := fs .NewMountNamespace (rootCtx , rootInode )
562538 if err != nil {
563- return fmt .Errorf ("find container mount point %q : %v" , c . cid , err )
539+ return fmt .Errorf ("creating new mount namespace for container : %v" , err )
564540 }
565- cu := specutils .MakeCleanup (func () { containerRoot .DecRef () })
566- defer cu .Clean ()
567-
568- log .Infof ("Mounted child's root fs to %q" , filepath .Join (ChildContainersDir , c .cid ))
569541
570542 // Set process root here, so 'rootCtx.Value(CtxRoot)' will return it.
571- procArgs .Root = containerRoot
543+ // This will also donate a reference to procArgs, as required.
544+ procArgs .MountNamespace = mns
545+ procArgs .Root = mns .Root ()
572546
573547 // Mount all submounts.
574- if err := c .mountSubmounts (rootCtx , conf , mns , containerRoot ); err != nil {
548+ if err := c .mountSubmounts (rootCtx , conf , mns , procArgs . Root ); err != nil {
575549 return err
576550 }
577- cu .Release ()
578551 return c .checkDispenser ()
579552}
580553
@@ -585,75 +558,11 @@ func (c *containerMounter) checkDispenser() error {
585558 return nil
586559}
587560
588- // destroyContainerFS cleans up the filesystem by unmounting all mounts for the
589- // given container and deleting the container root directory.
590- func destroyContainerFS (ctx context.Context , cid string , k * kernel.Kernel ) error {
591- defer func () {
592- // Flushing dirent references triggers many async close
593- // operations. We must wait for those to complete before
594- // returning, otherwise the caller may kill the gofer before
595- // they complete, causing a cascade of failing RPCs.
596- //
597- // This must take place in the first deferred function, so that
598- // it runs after all the other deferred DecRef() calls in this
599- // function.
600- log .Infof ("Waiting for async filesystem operations to complete" )
601- fs .AsyncBarrier ()
602- }()
603-
604- // First get a reference to the container root directory.
605- mns := k .RootMountNamespace ()
606- mnsRoot := mns .Root ()
607- defer mnsRoot .DecRef ()
608- containerRoot := path .Join (ChildContainersDir , cid )
609- maxTraversals := uint (0 )
610- containerRootDirent , err := mns .FindInode (ctx , mnsRoot , nil , containerRoot , & maxTraversals )
611- if err == syserror .ENOENT {
612- // Container must have been destroyed already. That's fine.
613- return nil
614- }
615- if err != nil {
616- return fmt .Errorf ("finding container root directory %q: %v" , containerRoot , err )
617- }
618- defer containerRootDirent .DecRef ()
619-
620- // Iterate through all submounts and unmount them. We unmount lazily by
621- // setting detach=true, so we can unmount in any order.
622- mnt := mns .FindMount (containerRootDirent )
623- for _ , m := range mns .AllMountsUnder (mnt ) {
624- root := m .Root ()
625- defer root .DecRef ()
626-
627- // Do a best-effort unmount by flushing the refs and unmount
628- // with "detach only = true". Unmount returns EINVAL when the mount point
629- // doesn't exist, i.e. it has already been unmounted.
630- log .Debugf ("Unmounting container mount %q" , root .BaseName ())
631- root .Inode .MountSource .FlushDirentRefs ()
632- if err := mns .Unmount (ctx , root , true /* detach only */ ); err != nil && err != syserror .EINVAL {
633- return fmt .Errorf ("unmounting container mount %q: %v" , root .BaseName (), err )
634- }
635- }
636-
637- // Get a reference to the parent directory and remove the root
638- // container directory.
639- maxTraversals = 0
640- containersDirDirent , err := mns .FindInode (ctx , mnsRoot , nil , ChildContainersDir , & maxTraversals )
641- if err != nil {
642- return fmt .Errorf ("finding containers directory %q: %v" , ChildContainersDir , err )
643- }
644- defer containersDirDirent .DecRef ()
645- log .Debugf ("Deleting container root %q" , containerRoot )
646- if err := containersDirDirent .RemoveDirectory (ctx , mnsRoot , cid ); err != nil {
647- return fmt .Errorf ("removing directory %q: %v" , containerRoot , err )
648- }
649-
650- return nil
651- }
652-
653561// setupRootContainer creates a mount namespace containing the root filesystem
654562// and all mounts. 'rootCtx' is used to walk directories to find mount points.
655- // 'setMountNS' is called after namespace is created. It must set the mount NS
656- // to 'rootCtx'.
563+ // The 'setMountNS' callback is called after the mount namespace is created and
564+ // will get a reference on that namespace. The callback must ensure that the
565+ // rootCtx has the provided mount namespace.
657566func (c * containerMounter ) setupRootContainer (userCtx context.Context , rootCtx context.Context , conf * Config , setMountNS func (* fs.MountNamespace )) error {
658567 for _ , hint := range c .hints .mounts {
659568 log .Infof ("Mounting master of shared mount %q from %q type %q" , hint .name , hint .mount .Source , hint .mount .Type )
@@ -664,13 +573,6 @@ func (c *containerMounter) setupRootContainer(userCtx context.Context, rootCtx c
664573 hint .root = inode
665574 }
666575
667- // Create a tmpfs mount where we create and mount a root filesystem for
668- // each child container.
669- c .mounts = append (c .mounts , specs.Mount {
670- Type : tmpfs ,
671- Destination : ChildContainersDir ,
672- })
673-
674576 rootInode , err := c .createRootMount (rootCtx , conf )
675577 if err != nil {
676578 return fmt .Errorf ("creating root mount: %v" , err )
0 commit comments