@@ -600,6 +600,8 @@ def git_checkout(
600600 commit : Optional [str ],
601601 ssh_key_file : Optional [Path ],
602602 ssh_known_hosts_file : Optional [Path ],
603+ efficient_clone : bool = False ,
604+ sparse_dirs : Optional [str ] = None ,
603605):
604606 env = {
605607 # abort if transfer speed is lower than 1kB/s for 1 minute
@@ -636,22 +638,43 @@ def git_checkout(
636638 args = [
637639 "git" ,
638640 "clone" ,
641+ ]
642+
643+ if efficient_clone :
644+ # Use blobless clone for faster initial clone
645+ # This fetches commit and tree objects but not file contents
646+ args .extend (["--filter=blob:none" ])
647+ # Use shallow clone with depth 1 for minimal history
648+ args .extend (["--depth=1" ])
649+ # Skip checkout initially, we'll do sparse checkout later
650+ args .extend (["--no-checkout" ])
651+ elif sparse_dirs :
652+ # For sparse checkout without efficient clone, still skip initial checkout
653+ # so we can set up sparse checkout before checking out files
654+ args .extend (["--no-checkout" ])
655+
656+ args .extend ([
639657 base_repo if base_repo else head_repo ,
640658 destination_path ,
641- ]
659+ ])
642660
643661 retry_required_command (b"vcs" , args , extra_env = env )
644662
645663 if base_ref :
646- args = ["git" , "fetch" , "origin" , base_ref ]
664+ args = ["git" , "fetch" ]
665+ if efficient_clone :
666+ # For shallow clones, we need to deepen to fetch more history
667+ args .extend (["--depth=100" ])
668+ args .extend (["origin" , base_ref ])
647669
648670 retry_required_command (b"vcs" , args , cwd = destination_path , extra_env = env )
649671
650672 # Create local branch so that taskgraph is able to compute differences
651673 # between the head branch and the base one, if needed
652- args = ["git" , "checkout" , base_ref ]
653-
654- retry_required_command (b"vcs" , args , cwd = destination_path , extra_env = env )
674+ if not efficient_clone and not sparse_dirs :
675+ # Only checkout if we didn't use --no-checkout initially
676+ args = ["git" , "checkout" , base_ref ]
677+ retry_required_command (b"vcs" , args , cwd = destination_path , extra_env = env )
655678
656679 # When commits are force-pushed (like on a testing branch), base_rev doesn't
657680 # exist on base_ref. Fetching it allows taskgraph to compute differences
@@ -660,7 +683,11 @@ def git_checkout(
660683 # Unlike base_ref just above, there is no need to checkout the revision:
661684 # it's immediately available after the fetch.
662685 if base_rev and base_rev != NULL_REVISION :
663- args = ["git" , "fetch" , "origin" , base_rev ]
686+ args = ["git" , "fetch" ]
687+ if efficient_clone :
688+ # For shallow clones, we need to deepen to fetch more history
689+ args .extend (["--depth=100" ])
690+ args .extend (["origin" , base_rev ])
664691
665692 retry_required_command (b"vcs" , args , cwd = destination_path , extra_env = env )
666693
@@ -671,28 +698,44 @@ def git_checkout(
671698 # in not having a tag, or worse: having an outdated version of one.
672699 # `--force` is needed to be able to update an existing tag.
673700 if ref and base_repo == head_repo :
674- args = [
675- "git" ,
676- "fetch" ,
677- "--tags" ,
678- "--force" ,
679- base_repo ,
680- ref ,
681- ]
701+ args = ["git" , "fetch" ]
702+ if efficient_clone :
703+ # For shallow clones, we need to deepen to fetch more history
704+ args .extend (["--depth=100" ])
705+ args .extend (["--tags" , "--force" , base_repo , ref ])
682706
683707 retry_required_command (b"vcs" , args , cwd = destination_path , extra_env = env )
684708
685709 # If a ref isn't provided, we fetch all refs from head_repo, which may be slow
686- args = [
687- "git" ,
688- "fetch" ,
689- "--no-tags" ,
690- head_repo ,
691- ref if ref else "+refs/heads/*:refs/remotes/work/*" ,
692- ]
710+ args = ["git" , "fetch" ]
711+ if efficient_clone :
712+ # For shallow clones, we need to deepen to fetch more history
713+ args . extend ([ "--depth=100" ])
714+ # With blobless clones, we only fetch the blobs we need
715+ args . extend ([ "--filter=blob:none" ])
716+ args . extend ([ "--no-tags" , head_repo , ref if ref else "+refs/heads/*:refs/remotes/work/*" ])
693717
694718 retry_required_command (b"vcs" , args , cwd = destination_path , extra_env = env )
695719
720+ if sparse_dirs :
721+ # When sparse directories/files are specified, set up sparse checkout
722+ # The sparse_dirs should be a colon-separated list of directories or files
723+ #
724+ # Note: Git's sparse-checkout behavior in cone mode (default since Git 2.37):
725+ # - Root-level files: Checked out exactly as specified
726+ # - Files in subdirectories: Entire parent directory is included
727+ # - Directories: All contents included
728+
729+ # Enable sparse checkout (cone mode is default since Git 2.37)
730+ args = ["git" , "sparse-checkout" , "init" ]
731+ run_required_command (b"vcs" , args , cwd = destination_path )
732+
733+ # Set the sparse entries
734+ entries = sparse_dirs .split (":" )
735+ args = ["git" , "sparse-checkout" , "set" ] + entries
736+ run_required_command (b"vcs" , args , cwd = destination_path )
737+
738+ # Now do the actual checkout
696739 args = [
697740 "git" ,
698741 "checkout" ,
@@ -879,11 +922,17 @@ def add_vcs_arguments(parser, project, name):
879922 "--%s-sparse-profile" % project ,
880923 help = "Path to sparse profile for %s checkout" % name ,
881924 )
925+ parser .add_argument (
926+ "--%s-efficient-clone" % project ,
927+ action = "store_true" ,
928+ help = "Use efficient cloning strategies (blobless, shallow, no-checkout) for %s" % name ,
929+ )
882930
883931
884932def collect_vcs_options (args , project , name ):
885933 checkout = getattr (args , "%s_checkout" % project )
886934 sparse_profile = getattr (args , "%s_sparse_profile" % project )
935+ efficient_clone = getattr (args , "%s_efficient_clone" % project )
887936
888937 env_prefix = project .upper ()
889938
@@ -896,6 +945,7 @@ def collect_vcs_options(args, project, name):
896945 ref = os .environ .get ("%s_HEAD_REF" % env_prefix )
897946 pip_requirements = os .environ .get ("%s_PIP_REQUIREMENTS" % env_prefix )
898947 private_key_secret = os .environ .get ("%s_SSH_SECRET_NAME" % env_prefix )
948+ sparse_dirs = os .environ .get ("%s_SPARSE_DIRS" % env_prefix )
899949
900950 store_path = os .environ .get ("HG_STORE_PATH" )
901951
@@ -930,6 +980,8 @@ def collect_vcs_options(args, project, name):
930980 "repo-type" : repo_type ,
931981 "ssh-secret-name" : private_key_secret ,
932982 "pip-requirements" : pip_requirements ,
983+ "efficient-clone" : efficient_clone ,
984+ "sparse-dirs" : sparse_dirs ,
933985 }
934986
935987
@@ -978,6 +1030,8 @@ def vcs_checkout_from_args(options):
9781030 revision ,
9791031 ssh_key_file ,
9801032 ssh_known_hosts_file ,
1033+ options .get ("efficient-clone" , False ),
1034+ options .get ("sparse-dirs" ),
9811035 )
9821036 elif options ["repo-type" ] == "hg" :
9831037 if not revision and not ref :
0 commit comments