From e3912be93baf0c4bf0b801ee43da11a868d73e89 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 29 Jun 2017 14:33:38 -0400 Subject: [PATCH 01/24] dir: refactor add_excludes() Refactor add_excludes() to separate the reading of the exclude file into a buffer and the parsing of the buffer into exclude_list items. Add add_excludes_from_blob_to_list() to allow an exclude file be specified with an OID. Signed-off-by: Jeff Hostetler --- dir.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++-- dir.h | 3 +++ 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/dir.c b/dir.c index 1d17b800cf374d..d848f2bfa29e48 100644 --- a/dir.c +++ b/dir.c @@ -739,6 +739,10 @@ static void invalidate_directory(struct untracked_cache *uc, dir->dirs[i]->recurse = 0; } +static int add_excludes_from_buffer(char *buf, size_t size, + const char *base, int baselen, + struct exclude_list *el); + /* * Given a file with name "fname", read it (either from disk, or from * an index if 'istate' is non-null), parse it and store the @@ -754,9 +758,9 @@ static int add_excludes(const char *fname, const char *base, int baselen, struct sha1_stat *sha1_stat) { struct stat st; - int fd, i, lineno = 1; + int fd; size_t size = 0; - char *buf, *entry; + char *buf; fd = open(fname, O_RDONLY); if (fd < 0 || fstat(fd, &st) < 0) { @@ -813,6 +817,17 @@ static int add_excludes(const char *fname, const char *base, int baselen, } } + add_excludes_from_buffer(buf, size, base, baselen, el); + return 0; +} + +static int add_excludes_from_buffer(char *buf, size_t size, + const char *base, int baselen, + struct exclude_list *el) +{ + int i, lineno = 1; + char *entry; + el->filebuf = buf; if (skip_utf8_bom(&buf, size)) @@ -841,6 +856,38 @@ int add_excludes_from_file_to_list(const char *fname, const char *base, return add_excludes(fname, base, baselen, el, istate, NULL); } +int add_excludes_from_blob_to_list( + struct object_id *oid, + const char *base, int baselen, + struct exclude_list *el) +{ + char *buf; + unsigned long size; + enum object_type type; + + buf = read_sha1_file(oid->hash, &type, &size); + if (!buf) + return -1; + + if (type != OBJ_BLOB) { + free(buf); + return -1; + } + + if (size == 0) { + free(buf); + return 0; + } + + if (buf[size - 1] != '\n') { + buf = xrealloc(buf, st_add(size, 1)); + buf[size++] = '\n'; + } + + add_excludes_from_buffer(buf, size, base, baselen, el); + return 0; +} + struct exclude_list *add_exclude_list(struct dir_struct *dir, int group_type, const char *src) { diff --git a/dir.h b/dir.h index e3717055d19336..1bcf39123ad7fd 100644 --- a/dir.h +++ b/dir.h @@ -256,6 +256,9 @@ extern struct exclude_list *add_exclude_list(struct dir_struct *dir, extern int add_excludes_from_file_to_list(const char *fname, const char *base, int baselen, struct exclude_list *el, struct index_state *istate); extern void add_excludes_from_file(struct dir_struct *, const char *fname); +extern int add_excludes_from_blob_to_list(struct object_id *oid, + const char *base, int baselen, + struct exclude_list *el); extern void parse_exclude_pattern(const char **string, int *patternlen, unsigned *flags, int *nowildcardlen); extern void add_exclude(const char *string, const char *base, int baselen, struct exclude_list *el, int srcpos); From 3d1ea35faa97f7c31916e67ef3fcf5ab3c764fbd Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Mon, 16 Oct 2017 18:48:09 +0000 Subject: [PATCH 02/24] list-objects-filter-map: extend oidmap Signed-off-by: Jeff Hostetler --- Makefile | 1 + list-objects-filter-map.c | 113 ++++++++++++++++++++++++++++++++++++++ list-objects-filter-map.h | 50 +++++++++++++++++ 3 files changed, 164 insertions(+) create mode 100644 list-objects-filter-map.c create mode 100644 list-objects-filter-map.h diff --git a/Makefile b/Makefile index cd75985991f453..e59f12da0b3af6 100644 --- a/Makefile +++ b/Makefile @@ -807,6 +807,7 @@ LIB_OBJS += levenshtein.o LIB_OBJS += line-log.o LIB_OBJS += line-range.o LIB_OBJS += list-objects.o +LIB_OBJS += list-objects-filter-map.o LIB_OBJS += ll-merge.o LIB_OBJS += lockfile.o LIB_OBJS += log-tree.o diff --git a/list-objects-filter-map.c b/list-objects-filter-map.c new file mode 100644 index 00000000000000..3a9335a316066b --- /dev/null +++ b/list-objects-filter-map.c @@ -0,0 +1,113 @@ +#include "cache.h" +#include "list-objects-filter-map.h" + +void list_objects_filter_map_init( + struct list_objects_filter_map *map, size_t initial_size) +{ + oidmap_init(&map->map, initial_size); +} + +struct list_objects_filter_map_entry *list_objects_filter_map_get( + const struct list_objects_filter_map *map, + const struct object_id *oid) +{ + struct list_objects_filter_map_entry *e = oidmap_get(&map->map, oid); + + return e; +} + +int list_objects_filter_map_contains(const struct list_objects_filter_map *map, + const struct object_id *oid) +{ + return !!list_objects_filter_map_get(map, oid); +} + +int list_objects_filter_map_insert(struct list_objects_filter_map *map, + const struct object_id *oid, + const char *pathname, enum object_type type) +{ + struct list_objects_filter_map_entry *e; + void *old; + + if (list_objects_filter_map_contains(map, oid)) + return 1; + + e = xcalloc(1, sizeof(*e)); + oidcpy(&e->entry.oid, oid); + if (pathname && *pathname) + e->pathname = strdup(pathname); + e->type = type; + + old = oidmap_put(&map->map, e); + assert(!old); /* since we already confirmed !contained */ + + return 0; +} + +static inline void lofme_free(struct list_objects_filter_map_entry *e) +{ + if (!e) + return; + if (e->pathname) + free(e->pathname); + free(e); +} + +void list_objects_filter_map_remove(struct list_objects_filter_map *map, + const struct object_id *oid) +{ + struct list_objects_filter_map_entry *e; + + e = oidmap_remove(&map->map, oid); + lofme_free(e); +} + +void list_objects_filter_map_clear(struct list_objects_filter_map *map) +{ + struct hashmap_iter iter; + struct list_objects_filter_map_entry *e; + + hashmap_iter_init(&map->map.map, &iter); + while ((e = hashmap_iter_next(&iter))) + lofme_free(e); + + oidmap_free(&map->map, 0); +} + +static int my_cmp(const void *a, const void *b) +{ + const struct oidmap_entry *ea, *eb; + + ea = *(const struct oidmap_entry **)a; + eb = *(const struct oidmap_entry **)b; + + return oidcmp(&ea->oid, &eb->oid); +} + +void list_objects_filter_map_foreach(struct list_objects_filter_map *map, + list_objects_filter_map_foreach_cb cb, + void *cb_data) +{ + struct hashmap_iter iter; + struct list_objects_filter_map_entry **array; + struct list_objects_filter_map_entry *e; + int k, nr; + + nr = hashmap_get_size(&map->map.map); + if (!nr) + return; + + array = xcalloc(nr, sizeof(*e)); + + k = 0; + hashmap_iter_init(&map->map.map, &iter); + while ((e = hashmap_iter_next(&iter))) + array[k++] = e; + + QSORT(array, nr, my_cmp); + + for (k = 0; k < nr; k++) + cb(k, nr, array[k], cb_data); + + free(array); +} diff --git a/list-objects-filter-map.h b/list-objects-filter-map.h new file mode 100644 index 00000000000000..080c0de41737b3 --- /dev/null +++ b/list-objects-filter-map.h @@ -0,0 +1,50 @@ +#ifndef LIST_OBJECTS_FILTER_MAP_H +#define LIST_OBJECTS_FILTER_MAP_H + +#include "oidmap.h" + +struct list_objects_filter_map { + struct oidmap map; +}; + +#define LIST_OBJECTS_FILTER_MAP_INIT { { NULL } } + +struct list_objects_filter_map_entry { + struct oidmap_entry entry; /* must be first */ + + char *pathname; + enum object_type type; +}; + +extern void list_objects_filter_map_init( + struct list_objects_filter_map *map, size_t initial_size); + +extern struct list_objects_filter_map_entry *list_objects_filter_map_get( + const struct list_objects_filter_map *map, + const struct object_id *oid); + +extern int list_objects_filter_map_contains( + const struct list_objects_filter_map *map, + const struct object_id *oid); + +extern int list_objects_filter_map_insert( + struct list_objects_filter_map *map, + const struct object_id *oid, + const char *pathname, enum object_type type); + +extern void list_objects_filter_map_remove( + struct list_objects_filter_map *map, + const struct object_id *oid); + +extern void list_objects_filter_map_clear(struct list_objects_filter_map *map); + +typedef void (*list_objects_filter_map_foreach_cb)( + int i, int i_limit, + struct list_objects_filter_map_entry *e, void *cb_data); + +extern void list_objects_filter_map_foreach( + struct list_objects_filter_map *map, + list_objects_filter_map_foreach_cb cb, + void *cb_data); + +#endif /* LIST_OBJECTS_FILTER_MAP_H */ From 8cc9b26713dfd1ef355f4e0368b0fa0ae26252ee Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Fri, 30 Jun 2017 13:20:48 -0400 Subject: [PATCH 03/24] list-objects: filter objects in traverse_commit_list Create traverse_commit_list_filtered() and add filtering interface to allow certain objects to be omitted (not shown) during a traversal. Update traverse_commit_list() to be a wrapper for the above. Filtering will be used in a future commit by rev-list and pack-objects for narrow/partial clone/fetch to omit certain blobs from the output. traverse_bitmap_commit_list() does not work with filtering. If a packfile bitmap is present, it will not be used. Signed-off-by: Jeff Hostetler --- list-objects.c | 66 ++++++++++++++++++++++++++++++++++++++------------ list-objects.h | 32 +++++++++++++++++++++++- 2 files changed, 81 insertions(+), 17 deletions(-) diff --git a/list-objects.c b/list-objects.c index b3931fa434dc99..3e86008b1fb55c 100644 --- a/list-objects.c +++ b/list-objects.c @@ -13,10 +13,13 @@ static void process_blob(struct rev_info *revs, show_object_fn show, struct strbuf *path, const char *name, - void *cb_data) + void *cb_data, + filter_object_fn filter, + void *filter_data) { struct object *obj = &blob->object; size_t pathlen; + list_objects_filter_result r = LOFR_MARK_SEEN | LOFR_SHOW; if (!revs->blob_objects) return; @@ -24,11 +27,15 @@ static void process_blob(struct rev_info *revs, die("bad blob object"); if (obj->flags & (UNINTERESTING | SEEN)) return; - obj->flags |= SEEN; pathlen = path->len; strbuf_addstr(path, name); - show(obj, path->buf, cb_data); + if (filter) + r = filter(LOFT_BLOB, obj, path->buf, &path->buf[pathlen], filter_data); + if (r & LOFR_MARK_SEEN) + obj->flags |= SEEN; + if (r & LOFR_SHOW) + show(obj, path->buf, cb_data); strbuf_setlen(path, pathlen); } @@ -69,7 +76,9 @@ static void process_tree(struct rev_info *revs, show_object_fn show, struct strbuf *base, const char *name, - void *cb_data) + void *cb_data, + filter_object_fn filter, + void *filter_data) { struct object *obj = &tree->object; struct tree_desc desc; @@ -77,6 +86,7 @@ static void process_tree(struct rev_info *revs, enum interesting match = revs->diffopt.pathspec.nr == 0 ? all_entries_interesting: entry_not_interesting; int baselen = base->len; + list_objects_filter_result r = LOFR_MARK_SEEN | LOFR_SHOW; if (!revs->tree_objects) return; @@ -90,9 +100,13 @@ static void process_tree(struct rev_info *revs, die("bad tree object %s", oid_to_hex(&obj->oid)); } - obj->flags |= SEEN; strbuf_addstr(base, name); - show(obj, base->buf, cb_data); + if (filter) + r = filter(LOFT_BEGIN_TREE, obj, base->buf, &base->buf[baselen], filter_data); + if (r & LOFR_MARK_SEEN) + obj->flags |= SEEN; + if (r & LOFR_SHOW) + show(obj, base->buf, cb_data); if (base->len) strbuf_addch(base, '/'); @@ -112,7 +126,7 @@ static void process_tree(struct rev_info *revs, process_tree(revs, lookup_tree(entry.oid), show, base, entry.path, - cb_data); + cb_data, filter, filter_data); else if (S_ISGITLINK(entry.mode)) process_gitlink(revs, entry.oid->hash, show, base, entry.path, @@ -121,8 +135,17 @@ static void process_tree(struct rev_info *revs, process_blob(revs, lookup_blob(entry.oid), show, base, entry.path, - cb_data); + cb_data, filter, filter_data); } + + if (filter) { + r = filter(LOFT_END_TREE, obj, base->buf, &base->buf[baselen], filter_data); + if (r & LOFR_MARK_SEEN) + obj->flags |= SEEN; + if (r & LOFR_SHOW) + show(obj, base->buf, cb_data); + } + strbuf_setlen(base, baselen); free_tree_buffer(tree); } @@ -183,10 +206,10 @@ static void add_pending_tree(struct rev_info *revs, struct tree *tree) add_pending_object(revs, &tree->object, ""); } -void traverse_commit_list(struct rev_info *revs, - show_commit_fn show_commit, - show_object_fn show_object, - void *data) +void traverse_commit_list_worker( + struct rev_info *revs, + show_commit_fn show_commit, show_object_fn show_object, void *show_data, + filter_object_fn filter, void *filter_data) { int i; struct commit *commit; @@ -200,7 +223,7 @@ void traverse_commit_list(struct rev_info *revs, */ if (commit->tree) add_pending_tree(revs, commit->tree); - show_commit(commit, data); + show_commit(commit, show_data); } for (i = 0; i < revs->pending.nr; i++) { struct object_array_entry *pending = revs->pending.objects + i; @@ -211,19 +234,19 @@ void traverse_commit_list(struct rev_info *revs, continue; if (obj->type == OBJ_TAG) { obj->flags |= SEEN; - show_object(obj, name, data); + show_object(obj, name, show_data); continue; } if (!path) path = ""; if (obj->type == OBJ_TREE) { process_tree(revs, (struct tree *)obj, show_object, - &base, path, data); + &base, path, show_data, filter, filter_data); continue; } if (obj->type == OBJ_BLOB) { process_blob(revs, (struct blob *)obj, show_object, - &base, path, data); + &base, path, show_data, filter, filter_data); continue; } die("unknown pending object %s (%s)", @@ -232,3 +255,14 @@ void traverse_commit_list(struct rev_info *revs, object_array_clear(&revs->pending); strbuf_release(&base); } + +void traverse_commit_list(struct rev_info *revs, + show_commit_fn show_commit, + show_object_fn show_object, + void *show_data) +{ + traverse_commit_list_worker( + revs, + show_commit, show_object, show_data, + NULL, NULL); +} diff --git a/list-objects.h b/list-objects.h index 0cebf8585cb179..43a06fbb64da22 100644 --- a/list-objects.h +++ b/list-objects.h @@ -8,4 +8,34 @@ void traverse_commit_list(struct rev_info *, show_commit_fn, show_object_fn, voi typedef void (*show_edge_fn)(struct commit *); void mark_edges_uninteresting(struct rev_info *, show_edge_fn); -#endif +enum list_objects_filter_result { + LOFR_ZERO = 0, + LOFR_MARK_SEEN = 1<<0, + LOFR_SHOW = 1<<1, +}; + +/* See object.h and revision.h */ +#define FILTER_REVISIT (1<<25) + +enum list_objects_filter_type { + LOFT_BEGIN_TREE, + LOFT_END_TREE, + LOFT_BLOB +}; + +typedef enum list_objects_filter_result list_objects_filter_result; +typedef enum list_objects_filter_type list_objects_filter_type; + +typedef list_objects_filter_result (*filter_object_fn)( + list_objects_filter_type filter_type, + struct object *obj, + const char *pathname, + const char *filename, + void *filter_data); + +void traverse_commit_list_worker( + struct rev_info *, + show_commit_fn, show_object_fn, void *show_data, + filter_object_fn filter, void *filter_data); + +#endif /* LIST_OBJECTS_H */ From e429fa5ee7980e0f1435499afa83cae612eae455 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Fri, 1 Sep 2017 17:19:51 +0000 Subject: [PATCH 04/24] list-objects-filter-blobs-none: add filter to omit all blobs Create a simple filter for traverse_commit_list_worker() to omit all blobs from the result. This filter will be used in a future commit by rev-list and pack-objects to create a "commits and trees" result. This is intended for partial clone and fetch support. Signed-off-by: Jeff Hostetler --- Makefile | 1 + list-objects-filter-blobs-none.c | 83 ++++++++++++++++++++++++++++++++ list-objects-filter-blobs-none.h | 18 +++++++ 3 files changed, 102 insertions(+) create mode 100644 list-objects-filter-blobs-none.c create mode 100644 list-objects-filter-blobs-none.h diff --git a/Makefile b/Makefile index e59f12da0b3af6..7e9d1f4fcee073 100644 --- a/Makefile +++ b/Makefile @@ -807,6 +807,7 @@ LIB_OBJS += levenshtein.o LIB_OBJS += line-log.o LIB_OBJS += line-range.o LIB_OBJS += list-objects.o +LIB_OBJS += list-objects-filter-blobs-none.o LIB_OBJS += list-objects-filter-map.o LIB_OBJS += ll-merge.o LIB_OBJS += lockfile.o diff --git a/list-objects-filter-blobs-none.c b/list-objects-filter-blobs-none.c new file mode 100644 index 00000000000000..38c4b25c3d3262 --- /dev/null +++ b/list-objects-filter-blobs-none.c @@ -0,0 +1,83 @@ +#include "cache.h" +#include "dir.h" +#include "tag.h" +#include "commit.h" +#include "tree.h" +#include "blob.h" +#include "diff.h" +#include "tree-walk.h" +#include "revision.h" +#include "list-objects.h" +#include "list-objects-filter-blobs-none.h" + +#define DEFAULT_MAP_SIZE (16*1024) + +/* + * A filter for list-objects to omit ALL blobs from the traversal. + * And to OPTIONALLY collect a list of the omitted OIDs. + */ +struct filter_blobs_none_data { + struct list_objects_filter_map *omits; +}; + +static list_objects_filter_result filter_blobs_none( + list_objects_filter_type filter_type, + struct object *obj, + const char *pathname, + const char *filename, + void *filter_data_) +{ + struct filter_blobs_none_data *filter_data = filter_data_; + + switch (filter_type) { + default: + die("unkown filter_type"); + return LOFR_ZERO; + + case LOFT_BEGIN_TREE: + assert(obj->type == OBJ_TREE); + /* always include all tree objects */ + return LOFR_MARK_SEEN | LOFR_SHOW; + + case LOFT_END_TREE: + assert(obj->type == OBJ_TREE); + return LOFR_ZERO; + + case LOFT_BLOB: + assert(obj->type == OBJ_BLOB); + assert((obj->flags & SEEN) == 0); + + if (filter_data->omits) + list_objects_filter_map_insert( + filter_data->omits, &obj->oid, pathname, + obj->type); + + return LOFR_MARK_SEEN; /* but not LOFR_SHOW (hard omit) */ + } +} + +void traverse_commit_list__blobs_none( + struct rev_info *revs, + show_commit_fn show_commit, + show_object_fn show_object, + list_objects_filter_map_foreach_cb print_omitted_object, + void *ctx_data) +{ + struct filter_blobs_none_data d; + + memset(&d, 0, sizeof(d)); + if (print_omitted_object) { + d.omits = xcalloc(1, sizeof(*d.omits)); + list_objects_filter_map_init(d.omits, DEFAULT_MAP_SIZE); + } + + traverse_commit_list_worker(revs, show_commit, show_object, ctx_data, + filter_blobs_none, &d); + + if (print_omitted_object) { + list_objects_filter_map_foreach(d.omits, + print_omitted_object, + ctx_data); + list_objects_filter_map_clear(d.omits); + } +} diff --git a/list-objects-filter-blobs-none.h b/list-objects-filter-blobs-none.h new file mode 100644 index 00000000000000..363c9de61696c3 --- /dev/null +++ b/list-objects-filter-blobs-none.h @@ -0,0 +1,18 @@ +#ifndef LIST_OBJECTS_FILTER_BLOBS_NONE_H +#define LIST_OBJECTS_FILTER_BLOBS_NONE_H + +#include "list-objects-filter-map.h" + +/* + * A filter for list-objects to omit ALL blobs + * from the traversal. + */ +void traverse_commit_list__blobs_none( + struct rev_info *revs, + show_commit_fn show_commit, + show_object_fn show_object, + list_objects_filter_map_foreach_cb print_omitted_object, + void *ctx_data); + +#endif /* LIST_OBJECTS_FILTER_BLOBS_NONE_H */ + From 5e9c75ec65490cda36c9d5a529166c6241bd7499 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Fri, 1 Sep 2017 17:24:19 +0000 Subject: [PATCH 05/24] list-objects-filter-blobs-limit: add large blob filtering Create a filter for traverse_commit_list_worker() to omit blobs larger than a requested size from the result, but always include ".git*" special files. Signed-off-by: Jeff Hostetler --- Makefile | 1 + list-objects-filter-blobs-limit.c | 147 ++++++++++++++++++++++++++++++ list-objects-filter-blobs-limit.h | 18 ++++ 3 files changed, 166 insertions(+) create mode 100644 list-objects-filter-blobs-limit.c create mode 100644 list-objects-filter-blobs-limit.h diff --git a/Makefile b/Makefile index 7e9d1f4fcee073..0fdeabbb8e4729 100644 --- a/Makefile +++ b/Makefile @@ -807,6 +807,7 @@ LIB_OBJS += levenshtein.o LIB_OBJS += line-log.o LIB_OBJS += line-range.o LIB_OBJS += list-objects.o +LIB_OBJS += list-objects-filter-blobs-limit.o LIB_OBJS += list-objects-filter-blobs-none.o LIB_OBJS += list-objects-filter-map.o LIB_OBJS += ll-merge.o diff --git a/list-objects-filter-blobs-limit.c b/list-objects-filter-blobs-limit.c new file mode 100644 index 00000000000000..2c3f8f1c000eba --- /dev/null +++ b/list-objects-filter-blobs-limit.c @@ -0,0 +1,147 @@ +#include "cache.h" +#include "dir.h" +#include "tag.h" +#include "commit.h" +#include "tree.h" +#include "blob.h" +#include "diff.h" +#include "tree-walk.h" +#include "revision.h" +#include "list-objects.h" +#include "list-objects-filter-blobs-limit.h" + +#define DEFAULT_MAP_SIZE (16*1024) + +/* + * A filter for list-objects to omit large blobs, + * but always include ".git*" special files. + * And to OPTIONALLY collect a list of the omitted OIDs. + */ +struct filter_blobs_limit_data { + struct list_objects_filter_map *omits; + unsigned long max_bytes; +}; + +static list_objects_filter_result filter_blobs_limit( + list_objects_filter_type filter_type, + struct object *obj, + const char *pathname, + const char *filename, + void *filter_data_) +{ + struct filter_blobs_limit_data *filter_data = filter_data_; + unsigned long object_length; + enum object_type t; + int is_special_filename; + + switch (filter_type) { + default: + die("unkown filter_type"); + return LOFR_ZERO; + + case LOFT_BEGIN_TREE: + assert(obj->type == OBJ_TREE); + /* always include all tree objects */ + return LOFR_MARK_SEEN | LOFR_SHOW; + + case LOFT_END_TREE: + assert(obj->type == OBJ_TREE); + return LOFR_ZERO; + + case LOFT_BLOB: + assert(obj->type == OBJ_BLOB); + assert((obj->flags & SEEN) == 0); + + is_special_filename = ((strncmp(filename, ".git", 4) == 0) && + filename[4]); + + /* + * If we are keeping a list of the omitted objects + * for the caller *AND* we previously "provisionally" + * omitted this object (because of size) *AND* it now + * has a special filename, make it not-omitted. + * Otherwise, continue to provisionally omit it. + */ + if (filter_data->omits && + list_objects_filter_map_contains(filter_data->omits, + &obj->oid)) { + if (!is_special_filename) + return LOFR_ZERO; + + list_objects_filter_map_remove(filter_data->omits, + &obj->oid); + return LOFR_MARK_SEEN | LOFR_SHOW; + } + + /* + * If filename matches ".git*", always include it (regardless + * of size). (This may include blobs that we do not have + * locally.) + */ + if (is_special_filename) + return LOFR_MARK_SEEN | LOFR_SHOW; + + t = sha1_object_info(obj->oid.hash, &object_length); + if (t != OBJ_BLOB) { /* probably OBJ_NONE */ + /* + * We DO NOT have the blob locally, so we cannot + * apply the size filter criteria. Be conservative + * and force show it (and let the caller deal with + * the ambiguity). (This matches the behavior above + * when the special filename matches.) + */ + return LOFR_MARK_SEEN | LOFR_SHOW; + } + + if (object_length < filter_data->max_bytes) + return LOFR_MARK_SEEN | LOFR_SHOW; + + /* + * Provisionally omit it. We've already established + * that this blob is too big and doesn't have a special + * filename, so we *WANT* to omit it. However, there + * may be a special file elsewhere in the tree that + * references this same blob, so we cannot reject it + * just yet. Leave the LOFR_ bits unset so that *IF* + * the blob appears again in the traversal, we will + * be asked again. + * + * If we are keeping a list of the ommitted objects, + * provisionally add it to the list. + */ + + if (filter_data->omits) + list_objects_filter_map_insert(filter_data->omits, + &obj->oid, pathname, + obj->type); + + return LOFR_ZERO; + } +} + +void traverse_commit_list__blobs_limit( + struct rev_info *revs, + show_commit_fn show_commit, + show_object_fn show_object, + list_objects_filter_map_foreach_cb print_omitted_object, + void *ctx_data, + unsigned long large_byte_limit) +{ + struct filter_blobs_limit_data d; + + memset(&d, 0, sizeof(d)); + if (print_omitted_object) { + d.omits = xcalloc(1, sizeof(*d.omits)); + list_objects_filter_map_init(d.omits, DEFAULT_MAP_SIZE); + } + d.max_bytes = large_byte_limit; + + traverse_commit_list_worker(revs, show_commit, show_object, ctx_data, + filter_blobs_limit, &d); + + if (print_omitted_object) { + list_objects_filter_map_foreach(d.omits, print_omitted_object, + ctx_data); + list_objects_filter_map_clear(d.omits); + } +} diff --git a/list-objects-filter-blobs-limit.h b/list-objects-filter-blobs-limit.h new file mode 100644 index 00000000000000..ea0508881d7ca0 --- /dev/null +++ b/list-objects-filter-blobs-limit.h @@ -0,0 +1,18 @@ +#ifndef LIST_OBJECTS_FILTER_BLOBS_LIMIT_H +#define LIST_OBJECTS_FILTER_BLOBS_LIMIT_H + +#include "list-objects-filter-map.h" + +/* + * A filter for list-objects to omit large blobs, + * but always include ".git*" special files. + */ +void traverse_commit_list__blobs_limit( + struct rev_info *revs, + show_commit_fn show_commit, + show_object_fn show_object, + list_objects_filter_map_foreach_cb print_omitted_object, + void *ctx_data, + unsigned long large_byte_limit); + +#endif /* LIST_OBJECTS_FILTER_BLOBS_LIMIT_H */ From c4cc54eb97544a0c6facd9176191bb6aaf2abc58 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Fri, 1 Sep 2017 17:27:44 +0000 Subject: [PATCH 06/24] list-objects-filter-sparse: add sparse filter Create a filter for traverse_commit_list_worker() to only include the blobs the would be referenced by a sparse-checkout using the given specification. Signed-off-by: Jeff Hostetler --- Makefile | 1 + list-objects-filter-sparse.c | 239 +++++++++++++++++++++++++++++++++++ list-objects-filter-sparse.h | 30 +++++ 3 files changed, 270 insertions(+) create mode 100644 list-objects-filter-sparse.c create mode 100644 list-objects-filter-sparse.h diff --git a/Makefile b/Makefile index 0fdeabbb8e4729..fc82664e5b3eb2 100644 --- a/Makefile +++ b/Makefile @@ -810,6 +810,7 @@ LIB_OBJS += list-objects.o LIB_OBJS += list-objects-filter-blobs-limit.o LIB_OBJS += list-objects-filter-blobs-none.o LIB_OBJS += list-objects-filter-map.o +LIB_OBJS += list-objects-filter-sparse.o LIB_OBJS += ll-merge.o LIB_OBJS += lockfile.o LIB_OBJS += log-tree.o diff --git a/list-objects-filter-sparse.c b/list-objects-filter-sparse.c new file mode 100644 index 00000000000000..c773d940ed8ef6 --- /dev/null +++ b/list-objects-filter-sparse.c @@ -0,0 +1,239 @@ +#include "cache.h" +#include "dir.h" +#include "tag.h" +#include "commit.h" +#include "tree.h" +#include "blob.h" +#include "diff.h" +#include "tree-walk.h" +#include "revision.h" +#include "list-objects.h" +#include "list-objects-filter-sparse.h" + +#define DEFAULT_MAP_SIZE (16*1024) + +/* + * A filter driven by a sparse-checkout specification to only + * include blobs that a sparse checkout would populate. + * + * The sparse-checkout spec can be loaded from a blob with the + * given OID or from a local pathname. We allow an OID because + * the repo may be bare or we may be doing the filtering on the + * server. + */ +struct frame { + int defval; + int child_prov_omit : 1; +}; + +struct filter_use_sparse_data { + struct list_objects_filter_map *omits; + struct exclude_list el; + + size_t nr, alloc; + struct frame *array_frame; +}; + +static list_objects_filter_result filter_use_sparse( + list_objects_filter_type filter_type, + struct object *obj, + const char *pathname, + const char *filename, + void *filter_data_) +{ + struct filter_use_sparse_data *filter_data = filter_data_; + struct list_objects_filter_map_entry *entry_prev = NULL; + int val, dtype; + struct frame *frame; + + switch (filter_type) { + default: + die("unkown filter_type"); + return LOFR_ZERO; + + case LOFT_BEGIN_TREE: + assert(obj->type == OBJ_TREE); + dtype = DT_DIR; + val = is_excluded_from_list(pathname, strlen(pathname), + filename, &dtype, &filter_data->el, + &the_index); + if (val < 0) + val = filter_data->array_frame[filter_data->nr].defval; + + ALLOC_GROW(filter_data->array_frame, filter_data->nr + 1, + filter_data->alloc); + filter_data->nr++; + filter_data->array_frame[filter_data->nr].defval = val; + filter_data->array_frame[filter_data->nr].child_prov_omit = 0; + + /* + * A directory with this tree OID may appear in multiple + * places in the tree. (Think of a directory move, with + * no other changes.) And with a different pathname, the + * is_excluded...() results for this directory and items + * contained within it may be different. So we cannot + * mark it SEEN (yet), since that will prevent process_tree() + * from revisiting this tree object with other pathnames. + * + * Only SHOW the tree object the first time we visit this + * tree object. + * + * We always show all tree objects. A future optimization + * may want to attempt to narrow this. + */ + if (obj->flags & FILTER_REVISIT) + return LOFR_ZERO; + obj->flags |= FILTER_REVISIT; + return LOFR_SHOW; + + case LOFT_END_TREE: + assert(obj->type == OBJ_TREE); + assert(filter_data->nr > 0); + + frame = &filter_data->array_frame[filter_data->nr]; + filter_data->nr--; + + /* + * Tell our parent directory if any of our children were + * provisionally omitted. + */ + filter_data->array_frame[filter_data->nr].child_prov_omit |= + frame->child_prov_omit; + + /* + * If there are NO provisionally omitted child objects (ALL child + * objects in this folder were INCLUDED), then we can mark the + * folder as SEEN (so we will not have to revisit it again). + */ + if (!frame->child_prov_omit) + return LOFR_MARK_SEEN; + return LOFR_ZERO; + + case LOFT_BLOB: + assert(obj->type == OBJ_BLOB); + assert((obj->flags & SEEN) == 0); + + frame = &filter_data->array_frame[filter_data->nr]; + + /* + * If we are keeping a list of the omitted objects + * for the caller *AND* we previsously provisionally + * omitted this object (because the THEN pathname + * is excluded) *AND* it has the same pathname, we + * can avoid duplicating the is_excluded lookup + * costs and continue provisionally omitting it. + */ + if (filter_data->omits) { + entry_prev = list_objects_filter_map_get( + filter_data->omits, &obj->oid); + if (entry_prev && + !strcmp(pathname, entry_prev->pathname)) { + frame->child_prov_omit = 1; + return LOFR_ZERO; + } + } + + dtype = DT_REG; + val = is_excluded_from_list(pathname, strlen(pathname), + filename, &dtype, &filter_data->el, + &the_index); + if (val < 0) + val = frame->defval; + if (val > 0) { + if (entry_prev) + list_objects_filter_map_remove( + filter_data->omits, &obj->oid); + return LOFR_MARK_SEEN | LOFR_SHOW; + } + + /* + * Provisionally omit it. We've already established that + * this pathname is not in the sparse-checkout specification + * with the CURRENT pathname, so we *WANT* to omit this blob. + * + * However, a pathname elsewhere in the tree may also + * reference this same blob, so we cannot reject it yet. + * Leave the LOFR_ bits unset so that if the blob appears + * again in the traversal, we will be asked again. + * + * The pathname that we associate with this omit is just + * the first one we saw for this blob. Other instances of + * this blob may have other pathnames and that is fine. + * We just use it for perf to do the entry_prev lookup + * above (because most of the time, the blob will be in + * the same place as we walk the commits). + */ + if (filter_data->omits) + list_objects_filter_map_insert(filter_data->omits, + &obj->oid, pathname, + obj->type); + + frame->child_prov_omit = 1; + return LOFR_ZERO; + } +} + +static void do_sparse( + struct filter_use_sparse_data *d, + struct rev_info *revs, + show_commit_fn show_commit, + show_object_fn show_object, + list_objects_filter_map_foreach_cb print_omitted_object, + void *ctx_data) +{ + ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc); + d->array_frame[d->nr].defval = 0; /* default to include */ + d->array_frame[d->nr].child_prov_omit = 0; + + traverse_commit_list_worker(revs, show_commit, show_object, ctx_data, + filter_use_sparse, d); + + if (print_omitted_object) { + list_objects_filter_map_foreach(d->omits, print_omitted_object, ctx_data); + list_objects_filter_map_clear(d->omits); + } +} + +void traverse_commit_list__sparse_oid( + struct rev_info *revs, + show_commit_fn show_commit, + show_object_fn show_object, + list_objects_filter_map_foreach_cb print_omitted_object, + void *ctx_data, + struct object_id *oid) +{ + struct filter_use_sparse_data d; + + memset(&d, 0, sizeof(d)); + if (print_omitted_object) { + d.omits = xcalloc(1, sizeof(*d.omits)); + list_objects_filter_map_init(d.omits, DEFAULT_MAP_SIZE); + } + if (add_excludes_from_blob_to_list(oid, NULL, 0, &d.el) < 0) + die("could not load filter specification"); + + do_sparse(&d, revs, show_commit, show_object, print_omitted_object, + ctx_data); +} + +void traverse_commit_list__sparse_path( + struct rev_info *revs, + show_commit_fn show_commit, + show_object_fn show_object, + list_objects_filter_map_foreach_cb print_omitted_object, + void *ctx_data, + const char *path) +{ + struct filter_use_sparse_data d; + + memset(&d, 0, sizeof(d)); + if (print_omitted_object) { + d.omits = xcalloc(1, sizeof(*d.omits)); + list_objects_filter_map_init(d.omits, DEFAULT_MAP_SIZE); + } + if (add_excludes_from_file_to_list(path, NULL, 0, &d.el, NULL) < 0) + die("could not load filter specification"); + + do_sparse(&d, revs, show_commit, show_object, print_omitted_object, + ctx_data); +} diff --git a/list-objects-filter-sparse.h b/list-objects-filter-sparse.h new file mode 100644 index 00000000000000..6c715bf6e2197d --- /dev/null +++ b/list-objects-filter-sparse.h @@ -0,0 +1,30 @@ +#ifndef LIST_OBJECTS_FILTERS_SPARSE_H +#define LIST_OBJECTS_FILTERS_SPARSE_H + +#include "list-objects-filter-map.h" + +/* + * A filter driven by a sparse-checkout specification to only + * include blobs that a sparse checkout would populate. + * + * The sparse-checkout spec can be loaded from a blob with the + * given OID, a blob with a blob-ish path, or from a local pathname. + * We allow an OID because the repo may be bare or we may be doing + * the filtering on the server. + */ +void traverse_commit_list__sparse_oid( + struct rev_info *revs, + show_commit_fn show_commit, + show_object_fn show_object, + list_objects_filter_map_foreach_cb print_omitted_object, + void *ctx_data, + struct object_id *oid); +void traverse_commit_list__sparse_path( + struct rev_info *revs, + show_commit_fn show_commit, + show_object_fn show_object, + list_objects_filter_map_foreach_cb print_omitted_object, + void *ctx_data, + const char *path); + +#endif /* LIST_OBJECTS_FILTERS_SPARSE_H */ From 1042b204bd7d10705eb7b26408e07004675bc4d2 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 6 Jul 2017 16:52:36 -0400 Subject: [PATCH 07/24] list-objects-filter-options: common argument parsing Create common routines and defines for parsing list-objects-filter-related command line arguments and pack-protocol fields. Signed-off-by: Jeff Hostetler --- Makefile | 1 + list-objects-filter-options.c | 101 ++++++++++++++++++++++++++++++++++ list-objects-filter-options.h | 50 +++++++++++++++++ 3 files changed, 152 insertions(+) create mode 100644 list-objects-filter-options.c create mode 100644 list-objects-filter-options.h diff --git a/Makefile b/Makefile index fc82664e5b3eb2..b9ff0b44498b87 100644 --- a/Makefile +++ b/Makefile @@ -810,6 +810,7 @@ LIB_OBJS += list-objects.o LIB_OBJS += list-objects-filter-blobs-limit.o LIB_OBJS += list-objects-filter-blobs-none.o LIB_OBJS += list-objects-filter-map.o +LIB_OBJS += list-objects-filter-options.o LIB_OBJS += list-objects-filter-sparse.o LIB_OBJS += ll-merge.o LIB_OBJS += lockfile.o diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c new file mode 100644 index 00000000000000..40f48ac275cabf --- /dev/null +++ b/list-objects-filter-options.c @@ -0,0 +1,101 @@ +#include "cache.h" +#include "commit.h" +#include "config.h" +#include "revision.h" +#include "list-objects.h" +#include "list-objects-filter-options.h" + +/* + * Parse value of the argument to the "filter" keword. + * On the command line this looks like: --filter= + * and in the pack protocol as: filter + * + * ::= blob:none + * blob:limit:[kmg] + * sparse:oid: + * sparse:path: + */ +int parse_list_objects_filter(struct list_objects_filter_options *filter_options, + const char *arg) +{ + struct object_context oc; + struct object_id sparse_oid; + const char *v0; + const char *v1; + + if (filter_options->choice) + die(_("multiple object filter types cannot be combined")); + + /* + * TODO consider rejecting 'arg' if it contains any + * TODO injection characters (since we might send this + * TODO to a sub-command or to the server and we don't + * TODO want to deal with legacy quoting/escaping for + * TODO a new feature). + */ + + filter_options->raw_value = strdup(arg); + + if (skip_prefix(arg, "blob:", &v0) || skip_prefix(arg, "blobs:", &v0)) { + if (!strcmp(v0, "none")) { + filter_options->choice = LOFC_BLOB_NONE; + return 0; + } + + if (skip_prefix(v0, "limit=", &v1) && + git_parse_ulong(v1, &filter_options->blob_limit_value)) { + filter_options->choice = LOFC_BLOB_LIMIT; + return 0; + } + } + else if (skip_prefix(arg, "sparse:", &v0)) { + if (skip_prefix(v0, "oid=", &v1)) { + filter_options->choice = LOFC_SPARSE_OID; + if (!get_oid_with_context(v1, GET_OID_BLOB, + &sparse_oid, &oc)) { + /* + * We successfully converted the + * into an actual OID. Rewrite the raw_value + * in canonoical form with just the OID. + * (If we send this request to the server, we + * want an absolute expression rather than a + * local-ref-relative expression.) + */ + free((char *)filter_options->raw_value); + filter_options->raw_value = + xstrfmt("sparse:oid=%s", + oid_to_hex(&sparse_oid)); + filter_options->sparse_oid_value = + oiddup(&sparse_oid); + } else { + /* + * We could not turn the into an + * OID. Leave the raw_value as is in case + * the server can parse it. (It may refer to + * a branch, commit, or blob we don't have.) + */ + } + return 0; + } + + if (skip_prefix(v0, "path=", &v1)) { + filter_options->choice = LOFC_SPARSE_PATH; + filter_options->sparse_path_value = strdup(v1); + return 0; + } + } + + die(_("invalid filter expression '%s'"), arg); + return 0; +} + +int opt_parse_list_objects_filter(const struct option *opt, + const char *arg, int unset) +{ + struct list_objects_filter_options *filter_options = opt->value; + + assert(arg); + assert(!unset); + + return parse_list_objects_filter(filter_options, arg); +} diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h new file mode 100644 index 00000000000000..23bd68ee7aa699 --- /dev/null +++ b/list-objects-filter-options.h @@ -0,0 +1,50 @@ +#ifndef LIST_OBJECTS_FILTER_OPTIONS_H +#define LIST_OBJECTS_FILTER_OPTIONS_H + +#include "parse-options.h" + +/* + * Common declarations and utilities for filtering objects (such as omitting + * large blobs) in list_objects:traverse_commit_list() and git-rev-list. + */ + +enum list_objects_filter_choice { + LOFC_DISABLED = 0, + LOFC_BLOB_NONE, + LOFC_BLOB_LIMIT, + LOFC_SPARSE_OID, + LOFC_SPARSE_PATH, +}; + +struct list_objects_filter_options { + /* + * The raw argument value given on the command line or + * protocol request. (The part after the "--keyword=".) + */ + char *raw_value; + + /* + * Parsed values. Only 1 will be set depending on the flags below. + */ + struct object_id *sparse_oid_value; + char *sparse_path_value; + unsigned long blob_limit_value; + + enum list_objects_filter_choice choice; +}; + +/* Normalized command line arguments */ +#define CL_ARG__FILTER "filter" + +int parse_list_objects_filter(struct list_objects_filter_options *filter_options, + const char *arg); + +int opt_parse_list_objects_filter(const struct option *opt, + const char *arg, int unset); + +#define OPT_PARSE_LIST_OBJECTS_FILTER(fo) \ + { OPTION_CALLBACK, 0, CL_ARG__FILTER, fo, N_("args"), \ + N_("object filtering"), PARSE_OPT_NONEG, \ + opt_parse_list_objects_filter } + +#endif /* LIST_OBJECTS_FILTER_OPTIONS_H */ From 714c0778649209b370d0bbd7ae6652de8fef7494 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Fri, 1 Sep 2017 18:20:36 +0000 Subject: [PATCH 08/24] list-objects: add traverse_commit_list_filtered method Add traverse_commit_list_filtered() wrapper around the various filter methods using common data in object_filter_options. Signed-off-by: Jeff Hostetler --- list-objects.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ list-objects.h | 11 +++++++++++ 2 files changed, 56 insertions(+) diff --git a/list-objects.c b/list-objects.c index 3e86008b1fb55c..4ce25939c528d7 100644 --- a/list-objects.c +++ b/list-objects.c @@ -7,6 +7,9 @@ #include "tree-walk.h" #include "revision.h" #include "list-objects.h" +#include "list-objects-filter-blobs-none.h" +#include "list-objects-filter-blobs-limit.h" +#include "list-objects-filter-sparse.h" static void process_blob(struct rev_info *revs, struct blob *blob, @@ -266,3 +269,45 @@ void traverse_commit_list(struct rev_info *revs, show_commit, show_object, show_data, NULL, NULL); } + +void traverse_commit_list_filtered( + struct list_objects_filter_options *filter_options, + struct rev_info *revs, + show_commit_fn show_commit, + show_object_fn show_object, + list_objects_filter_map_foreach_cb print_omitted_object, + void *show_data) +{ + switch (filter_options->choice) { + case LOFC_DISABLED: + traverse_commit_list(revs, show_commit, show_object, show_data); + return; + + case LOFC_BLOB_NONE: + traverse_commit_list__blobs_none( + revs, show_commit, show_object, print_omitted_object, + show_data); + return; + + case LOFC_BLOB_LIMIT: + traverse_commit_list__blobs_limit( + revs, show_commit, show_object, print_omitted_object, + show_data, filter_options->blob_limit_value); + return; + + case LOFC_SPARSE_OID: + traverse_commit_list__sparse_oid( + revs, show_commit, show_object, print_omitted_object, + show_data, filter_options->sparse_oid_value); + return; + + case LOFC_SPARSE_PATH: + traverse_commit_list__sparse_path( + revs, show_commit, show_object, print_omitted_object, + show_data, filter_options->sparse_path_value); + return; + + default: + die("unspecified list-objects filter"); + } +} diff --git a/list-objects.h b/list-objects.h index 43a06fbb64da22..d14b0e048e646b 100644 --- a/list-objects.h +++ b/list-objects.h @@ -1,6 +1,9 @@ #ifndef LIST_OBJECTS_H #define LIST_OBJECTS_H +#include "list-objects-filter-map.h" +#include "list-objects-filter-options.h" + typedef void (*show_commit_fn)(struct commit *, void *); typedef void (*show_object_fn)(struct object *, const char *, void *); void traverse_commit_list(struct rev_info *, show_commit_fn, show_object_fn, void *); @@ -38,4 +41,12 @@ void traverse_commit_list_worker( show_commit_fn, show_object_fn, void *show_data, filter_object_fn filter, void *filter_data); +void traverse_commit_list_filtered( + struct list_objects_filter_options *filter_options, + struct rev_info *revs, + show_commit_fn show_commit, + show_object_fn show_object, + list_objects_filter_map_foreach_cb print_omitted_object, + void *show_data); + #endif /* LIST_OBJECTS_H */ From 9ffafbbef384aaf9afd1459c9efe253a900e2f66 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Tue, 17 Oct 2017 19:04:14 +0000 Subject: [PATCH 09/24] extension.partialclone: introduce partial clone extension Introduce the ability to have missing objects in a repo. This functionality is guarded by a new repository extension option `extensions.partialClone`. See the update to Documentation/technical/repository-version.txt in this patch for more information. This patch is part of a patch originally authored by: Jonathan Tan Signed-off-by: Jeff Hostetler --- .../technical/repository-version.txt | 22 +++++ Makefile | 1 + cache.h | 4 + config.h | 3 + environment.c | 2 + partial-clone-utils.c | 99 +++++++++++++++++++ partial-clone-utils.h | 34 +++++++ setup.c | 15 +++ 8 files changed, 180 insertions(+) create mode 100644 partial-clone-utils.c create mode 100644 partial-clone-utils.h diff --git a/Documentation/technical/repository-version.txt b/Documentation/technical/repository-version.txt index 00ad37986efdce..9d488dbbcade4d 100644 --- a/Documentation/technical/repository-version.txt +++ b/Documentation/technical/repository-version.txt @@ -86,3 +86,25 @@ for testing format-1 compatibility. When the config key `extensions.preciousObjects` is set to `true`, objects in the repository MUST NOT be deleted (e.g., by `git-prune` or `git repack -d`). + +`partialcloneremote` +~~~~~~~~~~~~~~~~~~~~ + +When the config key `extensions.partialcloneremote` is set, it indicates +that the repo was created with a partial clone (or later performed +a partial fetch) and that the remote may have omitted sending +certain unwanted objects. Such a remote is called a "promisor remote" +and it promises that all such omitted objects can be fetched from it +in the future. + +The value of this key is the name of the promisor remote. + +`partialclonefilter` +~~~~~~~~~~~~~~~~~~~~ + +When the config key `extensions.partialclonefilter` is set, it gives +the initial filter expression used to create the partial clone. +This value becomed the default filter expression for subsequent +fetches (called "partial fetches") from the promisor remote. This +value may also be set by the first explicit partial fetch following a +normal clone. diff --git a/Makefile b/Makefile index b9ff0b44498b87..38632fb9c4b2e2 100644 --- a/Makefile +++ b/Makefile @@ -841,6 +841,7 @@ LIB_OBJS += pack-write.o LIB_OBJS += pager.o LIB_OBJS += parse-options.o LIB_OBJS += parse-options-cb.o +LIB_OBJS += partial-clone-utils.o LIB_OBJS += patch-delta.o LIB_OBJS += patch-ids.o LIB_OBJS += path.o diff --git a/cache.h b/cache.h index 6440e2bf21f580..4b785c030ec40c 100644 --- a/cache.h +++ b/cache.h @@ -860,12 +860,16 @@ extern int grafts_replace_parents; #define GIT_REPO_VERSION 0 #define GIT_REPO_VERSION_READ 1 extern int repository_format_precious_objects; +extern char *repository_format_partial_clone_remote; +extern char *repository_format_partial_clone_filter; struct repository_format { int version; int precious_objects; int is_bare; char *work_tree; + char *partial_clone_remote; /* value of extensions.partialcloneremote */ + char *partial_clone_filter; /* value of extensions.partialclonefilter */ struct string_list unknown_extensions; }; diff --git a/config.h b/config.h index a49d2644162250..90544ef46c39a2 100644 --- a/config.h +++ b/config.h @@ -34,6 +34,9 @@ struct config_options { const char *git_dir; }; +#define KEY_PARTIALCLONEREMOTE "partialcloneremote" +#define KEY_PARTIALCLONEFILTER "partialclonefilter" + typedef int (*config_fn_t)(const char *, const char *, void *); extern int git_default_config(const char *, const char *, void *); extern int git_config_from_file(config_fn_t fn, const char *, void *); diff --git a/environment.c b/environment.c index 8289c25b44d74a..2fcf9bb3d4d90e 100644 --- a/environment.c +++ b/environment.c @@ -27,6 +27,8 @@ int warn_ambiguous_refs = 1; int warn_on_object_refname_ambiguity = 1; int ref_paranoia = -1; int repository_format_precious_objects; +char *repository_format_partial_clone_remote; +char *repository_format_partial_clone_filter; const char *git_commit_encoding; const char *git_log_output_encoding; const char *apply_default_whitespace; diff --git a/partial-clone-utils.c b/partial-clone-utils.c new file mode 100644 index 00000000000000..8c925ae6208813 --- /dev/null +++ b/partial-clone-utils.c @@ -0,0 +1,99 @@ +#include "cache.h" +#include "config.h" +#include "partial-clone-utils.h" + +int is_partial_clone_registered(void) +{ + if (repository_format_partial_clone_remote || + repository_format_partial_clone_filter) + return 1; + + return 0; +} + +void partial_clone_utils_register( + const struct list_objects_filter_options *filter_options, + const char *remote, + const char *cmd_name) +{ + struct strbuf buf = STRBUF_INIT; + + if (is_partial_clone_registered()) { + /* + * The original partial-clone or a previous partial-fetch + * already registered the partial-clone settings. + * If we get here, we are in a subsequent partial-* command + * (with explicit filter args on the command line). + * + * For now, we restrict subsequent commands to one + * consistent with the original request. We may relax + * this later after we get more experience with the + * partial-clone feature. + * + * [] Restrict to same remote because our dynamic + * object loading only knows how to fetch objects + * from 1 remote. + */ + assert(filter_options && filter_options->choice); + assert(remote && *remote); + + if (strcmp(remote, repository_format_partial_clone_remote)) + die("%s --%s currently limited to remote '%s'", + cmd_name, CL_ARG__FILTER, + repository_format_partial_clone_remote); + + /* + * Treat the (possibly new) filter-spec as transient; + * use it for the current command, but do not overwrite + * the default. + */ + return; + } + + repository_format_partial_clone_remote = xstrdup(remote); + repository_format_partial_clone_filter = xstrdup(filter_options->raw_value); + + /* + * Force repo version > 0 to enable extensions namespace. + */ + git_config_set("core.repositoryformatversion", "1"); + + /* + * Use the "extensions" namespace in the config to record + * the name of the remote used in the partial clone. + * This will help us return to that server when we need + * to backfill missing objects. + * + * It is also used to indicate that there *MAY* be + * missing objects so that subsequent commands don't + * immediately die if they hit one. + * + * Also remember the initial filter settings used by + * clone as a default for future fetches. + */ + git_config_set("extensions." KEY_PARTIALCLONEREMOTE, + repository_format_partial_clone_remote); + git_config_set("extensions." KEY_PARTIALCLONEFILTER, + repository_format_partial_clone_filter); + + /* + * TODO Do we need to record both partial-clone + * parameters in the extensions namespace and in the + * section for the remote? + * + * Or should we just remember 1 in each, as in: + * "extension.partialcloneremote=" + * "remote..filter=" + * The issue is when can we set both of the + * repository_format_partial_clone_* globals + * durint subsequent startups. + * See setup.c:check_repo_format(). + */ + strbuf_addf(&buf, "remote.%s.%s", remote, KEY_PARTIALCLONEREMOTE); + git_config_set(buf.buf, repository_format_partial_clone_remote); + + strbuf_addf(&buf, "remote.%s.%s", remote, KEY_PARTIALCLONEFILTER); + git_config_set(buf.buf, repository_format_partial_clone_filter); + + strbuf_release(&buf); +} diff --git a/partial-clone-utils.h b/partial-clone-utils.h new file mode 100644 index 00000000000000..b52757084d1b81 --- /dev/null +++ b/partial-clone-utils.h @@ -0,0 +1,34 @@ +#ifndef PARTIAL_CLONE_UTILS_H +#define PARTIAL_CLONE_UTILS_H + +#include "list-objects-filter-options.h" + +/* + * Register that partial-clone was used to create the repo and + * update the config on disk. + * + * If nothing else, this indicates that the ODB may have missing + * objects and that various commands should handle that gracefully. + * + * Record the remote used for the clone so that we know where + * to get missing objects in the future. + * + * Also record the filter expression so that we know something + * about the missing objects (e.g., size-limit vs sparse). + * + * May also be used by a partial-fetch following a normal clone + * to turn on the above tracking. + */ +extern void partial_clone_utils_register( + const struct list_objects_filter_options *filter_options, + const char *remote, + const char *cmd_name); + +/* + * Return 1 if partial-clone was used to create the repo + * or a subsequent partial-fetch was used. This is an + * indicator that there may be missing objects. + */ +extern int is_partial_clone_registered(void); + +#endif /* PARTIAL_CLONE_UTILS_H */ diff --git a/setup.c b/setup.c index 03f51e056cd6e6..bc4133dd39f82c 100644 --- a/setup.c +++ b/setup.c @@ -420,6 +420,19 @@ static int check_repo_format(const char *var, const char *value, void *vdata) ; else if (!strcmp(ext, "preciousobjects")) data->precious_objects = git_config_bool(var, value); + + else if (!strcmp(ext, KEY_PARTIALCLONEREMOTE)) + if (!value) + return config_error_nonbool(var); + else + data->partial_clone_remote = xstrdup(value); + + else if (!strcmp(ext, KEY_PARTIALCLONEFILTER)) + if (!value) + return config_error_nonbool(var); + else + data->partial_clone_filter = xstrdup(value); + else string_list_append(&data->unknown_extensions, ext); } else if (strcmp(var, "core.bare") == 0) { @@ -463,6 +476,8 @@ static int check_repository_format_gently(const char *gitdir, int *nongit_ok) } repository_format_precious_objects = candidate.precious_objects; + repository_format_partial_clone_remote = candidate.partial_clone_remote; + repository_format_partial_clone_filter = candidate.partial_clone_filter; string_list_clear(&candidate.unknown_extensions, 0); if (!has_common) { if (candidate.is_bare != -1) { From db7c198b636ec88da1e2ac3d10eee701ca8022cf Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Fri, 30 Jun 2017 13:39:05 -0400 Subject: [PATCH 10/24] rev-list: add list-objects filtering support Teach rev-list to use the filtering provided by the traverse_commit_list_filtered() interface to omit unwanted objects from the result. This feature is only enabled when one of the "--objects*" options are used. When the "--filter-print-omitted" option is used, the omitted objects and their sizes are printed at the end. These are marked with a "~". This can be combined with "--quiet" to get a list of just the omitted objects. Signed-off-by: Jeff Hostetler --- Documentation/git-rev-list.txt | 5 +- Documentation/rev-list-options.txt | 30 +++++++++++ builtin/rev-list.c | 85 +++++++++++++++++++++++++++++- 3 files changed, 117 insertions(+), 3 deletions(-) diff --git a/Documentation/git-rev-list.txt b/Documentation/git-rev-list.txt index ef22f1775b6348..6d2e60dab34fe3 100644 --- a/Documentation/git-rev-list.txt +++ b/Documentation/git-rev-list.txt @@ -47,7 +47,10 @@ SYNOPSIS [ --fixed-strings | -F ] [ --date=] [ [ --objects | --objects-edge | --objects-edge-aggressive ] - [ --unpacked ] ] + [ --unpacked ] + [ --filter= ] ] + [ --filter-print-missing ] + [ --filter-print-omitted ] [ --pretty | --header ] [ --bisect ] [ --bisect-vars ] diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt index 7d860bfca1442e..88f88788b0cd35 100644 --- a/Documentation/rev-list-options.txt +++ b/Documentation/rev-list-options.txt @@ -706,6 +706,36 @@ ifdef::git-rev-list[] --unpacked:: Only useful with `--objects`; print the object IDs that are not in packs. + +--filter=:: + Only useful with one of the `--objects*`; omits objects (usually + blobs) from the list of printed objects. The '' + may be one of the following: ++ +The form '--filter=blob:none' omits all blobs. ++ +The form '--filter=blob:limit=[kmg]' omits blobs larger than n bytes +or units. The value may be zero. Special files matching '.git*' are +alwayse included, regardless of size. ++ +The form '--filter=sparse:oid=' uses a sparse-checkout +specification contained in the object (or the object that the expression +evaluates to) to omit blobs not required by the corresponding sparse +checkout. ++ +The form '--filter=sparse:path=' similarly uses a sparse-checkout +specification contained in . + +--filter-print-missing:: + Prints a list of the missing objects for the requested traversal. + Object IDs are prefixed with a ``?'' character. The object type + is printed after the ID. This may be used with or without any of + the above filtering options. + +--filter-print-omitted:: + Only useful with one of the above `--filter*`; prints a list + of the omitted objects. Object IDs are prefixed with a ``~'' + character. endif::git-rev-list[] --no-walk[=(sorted|unsorted)]:: diff --git a/builtin/rev-list.c b/builtin/rev-list.c index c1c74d4a795643..eeb999bd469f22 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -12,6 +12,7 @@ #include "bisect.h" #include "progress.h" #include "reflog-walk.h" +#include "partial-clone-utils.h" static const char rev_list_usage[] = "git rev-list [OPTION] ... [ -- paths... ]\n" @@ -54,6 +55,11 @@ static const char rev_list_usage[] = static struct progress *progress; static unsigned progress_counter; +static struct list_objects_filter_options filter_options; +static struct list_objects_filter_map missing_objects; +static int arg_print_missing; +static int arg_print_omitted; +#define DEFAULT_MAP_SIZE (16*1024) static void finish_commit(struct commit *commit, void *data); static void show_commit(struct commit *commit, void *data) @@ -181,8 +187,26 @@ static void finish_commit(struct commit *commit, void *data) static void finish_object(struct object *obj, const char *name, void *cb_data) { struct rev_list_info *info = cb_data; - if (obj->type == OBJ_BLOB && !has_object_file(&obj->oid)) + if (obj->type == OBJ_BLOB && !has_object_file(&obj->oid)) { + if (arg_print_missing) { + list_objects_filter_map_insert( + &missing_objects, &obj->oid, name, obj->type); + return; + } + + /* + * Relax consistency checks when we expect missing + * objects because of partial-clone or a previous + * partial-fetch. + * + * Note that this is independent of any filtering that + * we are doing in this run. + */ + if (is_partial_clone_registered()) + return; + die("missing blob object '%s'", oid_to_hex(&obj->oid)); + } if (info->revs->verify_objects && !obj->parsed && obj->type != OBJ_COMMIT) parse_object(&obj->oid); } @@ -202,6 +226,22 @@ static void show_edge(struct commit *commit) printf("-%s\n", oid_to_hex(&commit->object.oid)); } +static void print_omitted_object(int i, int i_limit, struct list_objects_filter_map_entry *e, void *cb_data) +{ + /* struct rev_list_info *info = cb_data; */ + const char *tn = typename(e->type); + + printf("~%s %s\n", oid_to_hex(&e->entry.oid), tn); +} + +static void print_missing_object(int i, int i_limit, struct list_objects_filter_map_entry *e, void *cb_data) +{ + /* struct rev_list_info *info = cb_data; */ + const char *tn = typename(e->type); + + printf("?%s %s\n", oid_to_hex(&e->entry.oid), tn); +} + static void print_var_str(const char *var, const char *val) { printf("%s='%s'\n", var, val); @@ -335,6 +375,26 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) show_progress = arg; continue; } + + if (skip_prefix(arg, ("--" CL_ARG__FILTER "="), &arg)) { + parse_list_objects_filter(&filter_options, arg); + if (filter_options.choice && !revs.blob_objects) + die(_("object filtering requires --objects")); + if (filter_options.choice == LOFC_SPARSE_OID && + !filter_options.sparse_oid_value) + die(_("invalid sparse value '%s'"), + filter_options.raw_value); + continue; + } + if (!strcmp(arg, "--filter-print-missing")) { + arg_print_missing = 1; + continue; + } + if (!strcmp(arg, "--filter-print-omitted")) { + arg_print_omitted = 1; + continue; + } + usage(rev_list_usage); } @@ -360,6 +420,9 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) if (revs.show_notes) die(_("rev-list does not support display of notes")); + if (filter_options.choice && use_bitmap_index) + die(_("cannot combine --use-bitmap-index with object filtering")); + save_commit_buffer = (revs.verbose_header || revs.grep_filter.pattern_list || revs.grep_filter.header_list); @@ -404,7 +467,25 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) return show_bisect_vars(&info, reaches, all); } - traverse_commit_list(&revs, show_commit, show_object, &info); + if (arg_print_missing) { + memset(&missing_objects, 0, sizeof(missing_objects)); + list_objects_filter_map_init(&missing_objects, + DEFAULT_MAP_SIZE); + } + + if (filter_options.choice) + traverse_commit_list_filtered(&filter_options, &revs, + show_commit, show_object, + (arg_print_omitted ? print_omitted_object : NULL), + &info); + else + traverse_commit_list(&revs, show_commit, show_object, &info); + + if (arg_print_missing) { + list_objects_filter_map_foreach(&missing_objects, + print_missing_object, &info); + list_objects_filter_map_clear(&missing_objects); + } stop_progress(&progress); From 5c04cf132c89721af59e872ffd8170c485beb99c Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Fri, 30 Jun 2017 15:34:06 -0400 Subject: [PATCH 11/24] t6112: rev-list object filtering test Signed-off-by: Jeff Hostetler --- t/t6112-rev-list-filters-objects.sh | 223 ++++++++++++++++++++++++++++ 1 file changed, 223 insertions(+) create mode 100755 t/t6112-rev-list-filters-objects.sh diff --git a/t/t6112-rev-list-filters-objects.sh b/t/t6112-rev-list-filters-objects.sh new file mode 100755 index 00000000000000..26fa12fed3d0ca --- /dev/null +++ b/t/t6112-rev-list-filters-objects.sh @@ -0,0 +1,223 @@ +#!/bin/sh + +test_description='git rev-list with object filtering for partial clone' + +. ./test-lib.sh + +# Test the blob:none filter. + +test_expect_success 'setup r1' ' + echo "{print \$1}" >print_1.awk && + echo "{print \$2}" >print_2.awk && + + git init r1 && + for n in 1 2 3 4 5 + do + echo "This is file: $n" > r1/file.$n + git -C r1 add file.$n + git -C r1 commit -m "$n" + done +' + +test_expect_success 'verify blob:none omits all 5 blobs' ' + git -C r1 ls-files -s file.1 file.2 file.3 file.4 file.5 \ + | awk -f print_2.awk \ + | sort >expected && + git -C r1 rev-list HEAD --quiet --objects --filter-print-omitted --filter=blob:none \ + | awk -f print_1.awk \ + | sed "s/~//" >observed && + test_cmp observed expected +' + +test_expect_success 'verify emitted+omitted == all' ' + git -C r1 rev-list HEAD --objects \ + | awk -f print_1.awk \ + | sort >expected && + git -C r1 rev-list HEAD --objects --filter-print-omitted --filter=blob:none \ + | awk -f print_1.awk \ + | sed "s/~//" \ + | sort >observed && + test_cmp observed expected +' + + +# Test blob:limit=[kmg] filter. +# We boundary test around the size parameter. The filter is strictly less than +# the value, so size 500 and 1000 should have the same results, but 1001 should +# filter more. + +test_expect_success 'setup r2' ' + git init r2 && + for n in 1000 10000 + do + printf "%"$n"s" X > r2/large.$n + git -C r2 add large.$n + git -C r2 commit -m "$n" + done +' + +test_expect_success 'verify blob:limit=500 omits all blobs' ' + git -C r2 ls-files -s large.1000 large.10000 \ + | awk -f print_2.awk \ + | sort >expected && + git -C r2 rev-list HEAD --quiet --objects --filter-print-omitted --filter=blob:limit=500 \ + | awk -f print_1.awk \ + | sed "s/~//" >observed && + test_cmp observed expected +' + +test_expect_success 'verify emitted+omitted == all' ' + git -C r2 rev-list HEAD --objects \ + | awk -f print_1.awk \ + | sort >expected && + git -C r2 rev-list HEAD --objects --filter-print-omitted --filter=blob:limit=500 \ + | awk -f print_1.awk \ + | sed "s/~//" \ + | sort >observed && + test_cmp observed expected +' + +test_expect_success 'verify blob:limit=1000' ' + git -C r2 ls-files -s large.1000 large.10000 \ + | awk -f print_2.awk \ + | sort >expected && + git -C r2 rev-list HEAD --quiet --objects --filter-print-omitted --filter=blob:limit=1000 \ + | awk -f print_1.awk \ + | sed "s/~//" >observed && + test_cmp observed expected +' + +test_expect_success 'verify blob:limit=1001' ' + git -C r2 ls-files -s large.10000 \ + | awk -f print_2.awk \ + | sort >expected && + git -C r2 rev-list HEAD --quiet --objects --filter-print-omitted --filter=blob:limit=1001 \ + | awk -f print_1.awk \ + | sed "s/~//" >observed && + test_cmp observed expected +' + +test_expect_success 'verify blob:limit=1k' ' + git -C r2 ls-files -s large.10000 \ + | awk -f print_2.awk \ + | sort >expected && + git -C r2 rev-list HEAD --quiet --objects --filter-print-omitted --filter=blob:limit=1k \ + | awk -f print_1.awk \ + | sed "s/~//" >observed && + test_cmp observed expected +' + +test_expect_success 'verify blob:limit=1m' ' + cat expected && + git -C r2 rev-list HEAD --quiet --objects --filter-print-omitted --filter=blob:limit=1m \ + | awk -f print_1.awk \ + | sed "s/~//" >observed && + test_cmp observed expected +' + +# Test sparse:path= filter. +# Use a local file containing a sparse-checkout specification to filter +# out blobs not required for the corresponding sparse-checkout. We do not +# require sparse-checkout to actually be enabled. + +test_expect_success 'setup r3' ' + git init r3 && + mkdir r3/dir1 && + for n in sparse1 sparse2 + do + echo "This is file: $n" > r3/$n + git -C r3 add $n + echo "This is file: dir1/$n" > r3/dir1/$n + git -C r3 add dir1/$n + done && + git -C r3 commit -m "sparse" && + echo dir1/ >pattern1 && + echo sparse1 >pattern2 +' + +test_expect_success 'verify sparse:path=pattern1 omits top-level files' ' + git -C r3 ls-files -s sparse1 sparse2 \ + | awk -f print_2.awk \ + | sort >expected && + git -C r3 rev-list HEAD --quiet --objects --filter-print-omitted --filter=sparse:path=../pattern1 \ + | awk -f print_1.awk \ + | sed "s/~//" >observed && + test_cmp observed expected +' + +test_expect_success 'verify sparse:path=pattern2 omits both sparse2 files' ' + git -C r3 ls-files -s sparse2 dir1/sparse2 \ + | awk -f print_2.awk \ + | sort >expected && + git -C r3 rev-list HEAD --quiet --objects --filter-print-omitted --filter=sparse:path=../pattern2 \ + | awk -f print_1.awk \ + | sed "s/~//" >observed && + test_cmp observed expected +' + +# Test sparse:oid= filter. +# Like sparse:path, but we get the sparse-checkout specification from +# a blob rather than a file on disk. + +test_expect_success 'setup r3 part 2' ' + echo dir1/ >r3/pattern && + git -C r3 add pattern && + git -C r3 commit -m "pattern" +' + +test_expect_success 'verify sparse:oid=OID omits top-level files' ' + git -C r3 ls-files -s pattern sparse1 sparse2 \ + | awk -f print_2.awk \ + | sort >expected && + oid=$(git -C r3 ls-files -s pattern | awk -f print_2.awk) && + git -C r3 rev-list HEAD --quiet --objects --filter-print-omitted --filter=sparse:oid=$oid \ + | awk -f print_1.awk \ + | sed "s/~//" >observed && + test_cmp observed expected +' + +test_expect_success 'verify sparse:oid=oid-ish omits top-level files' ' + git -C r3 ls-files -s pattern sparse1 sparse2 \ + | awk -f print_2.awk \ + | sort >expected && + git -C r3 rev-list HEAD --quiet --objects --filter-print-omitted --filter=sparse:oid=master:pattern \ + | awk -f print_1.awk \ + | sed "s/~//" >observed && + test_cmp observed expected +' + +# Delete some loose objects and use rev-list, but WITHOUT any filtering. +# This models previously omitted objects that we did not receive. + +test_expect_success 'rev-list W/ print-missing' ' + git -C r1 ls-files -s file.1 file.2 file.3 file.4 file.5 \ + | awk -f print_2.awk \ + | sort >expected && + for id in `cat expected | sed "s|..|&/|"` + do + rm r1/.git/objects/$id + done && + git -C r1 rev-list --quiet HEAD --filter-print-missing --objects \ + | awk -f print_1.awk \ + | sed "s/?//" \ + | sort >observed && + test_cmp observed expected +' + +test_expect_success 'rev-list W/O print-missing fails' ' + test_must_fail git -C r1 rev-list --quiet --objects HEAD +' + +test_expect_success 'rev-list W/ extension.partialcloneremote set succeeds' ' + git -C r1 config --local core.repositoryformatversion 1 && + git -C r1 config --local extensions.partialcloneremote "origin" && + git -C r1 rev-list --quiet --objects HEAD +' + +test_expect_success 'rev-list W/ extension.partialclonefilter set succeeds' ' + git -C r1 config --local core.repositoryformatversion 1 && + git -C r1 config --local extensions.partialclonefilter "something" && + git -C r1 rev-list --quiet --objects HEAD +' + +test_done From eb1ea18e62303f75592acf92d3ab628e61b799da Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 7 Sep 2017 16:06:54 +0000 Subject: [PATCH 12/24] pack-objects: add list-objects filtering Teach pack-objects to use the filtering provided by the traverse_commit_list_filtered() interface to omit unwanted objects from the resulting packfile. This feature is intended for partial clone/fetch. Filtering requires the use of the "--stdout" option. Signed-off-by: Jeff Hostetler --- Documentation/git-pack-objects.txt | 8 +++++++- builtin/pack-objects.c | 18 +++++++++++++++++- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/Documentation/git-pack-objects.txt b/Documentation/git-pack-objects.txt index 473a16135abf86..8b4a22319b9d19 100644 --- a/Documentation/git-pack-objects.txt +++ b/Documentation/git-pack-objects.txt @@ -12,7 +12,8 @@ SYNOPSIS 'git pack-objects' [-q | --progress | --all-progress] [--all-progress-implied] [--no-reuse-delta] [--delta-base-offset] [--non-empty] [--local] [--incremental] [--window=] [--depth=] - [--revs [--unpacked | --all]] [--stdout | base-name] + [--revs [--unpacked | --all]] + [--stdout [--filter=] | base-name] [--shallow] [--keep-true-parents] < object-list @@ -236,6 +237,11 @@ So does `git bundle` (see linkgit:git-bundle[1]) when it creates a bundle. With this option, parents that are hidden by grafts are packed nevertheless. +--filter=:: + Requires `--stdout`. Omits certain objects (usually blobs) from + the resulting packfile. See linkgit:git-rev-list[1] for valid + `` forms. + SEE ALSO -------- linkgit:git-rev-list[1] diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 6e77dfd44439f4..a25185063ebb2a 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -79,6 +79,8 @@ static unsigned long cache_max_small_delta_size = 1000; static unsigned long window_memory_limit = 0; +static struct list_objects_filter_options filter_options; + /* * stats */ @@ -2816,7 +2818,12 @@ static void get_object_list(int ac, const char **av) if (prepare_revision_walk(&revs)) die("revision walk setup failed"); mark_edges_uninteresting(&revs, show_edge); - traverse_commit_list(&revs, show_commit, show_object, NULL); + if (filter_options.choice) + traverse_commit_list_filtered(&filter_options, &revs, + show_commit, show_object, + NULL, NULL); + else + traverse_commit_list(&revs, show_commit, show_object, NULL); if (unpack_unreachable_expiration) { revs.ignore_missing_links = 1; @@ -2952,6 +2959,9 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) N_("use a bitmap index if available to speed up counting objects")), OPT_BOOL(0, "write-bitmap-index", &write_bitmap_index, N_("write a bitmap index together with the pack index")), + + OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options), + OPT_END(), }; @@ -3028,6 +3038,12 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) if (!rev_list_all || !rev_list_reflog || !rev_list_index) unpack_unreachable_expiration = 0; + if (filter_options.choice) { + if (!pack_to_stdout) + die("cannot use filtering with an indexable pack."); + use_bitmap_index = 0; + } + /* * "soft" reasons not to use bitmaps - for on-disk repack by default we want * From 9e8eccbd630d2d4034038a32dc8f99372ab3a762 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Mon, 16 Oct 2017 21:19:07 +0000 Subject: [PATCH 13/24] t5317: pack-objects object filtering test Signed-off-by: Jeff Hostetler --- t/t5317-pack-objects-filter-objects.sh | 384 +++++++++++++++++++++++++ 1 file changed, 384 insertions(+) create mode 100755 t/t5317-pack-objects-filter-objects.sh diff --git a/t/t5317-pack-objects-filter-objects.sh b/t/t5317-pack-objects-filter-objects.sh new file mode 100755 index 00000000000000..ef7a8f60e60b88 --- /dev/null +++ b/t/t5317-pack-objects-filter-objects.sh @@ -0,0 +1,384 @@ +#!/bin/sh + +test_description='git pack-objects with object filtering for partial clone' + +. ./test-lib.sh + +# Test blob:none filter. + +test_expect_success 'setup r1' ' + echo "{print \$1}" >print_1.awk && + echo "{print \$2}" >print_2.awk && + + git init r1 && + for n in 1 2 3 4 5 + do + echo "This is file: $n" > r1/file.$n + git -C r1 add file.$n + git -C r1 commit -m "$n" + done +' + +test_expect_success 'verify blob count in normal packfile' ' + git -C r1 ls-files -s file.1 file.2 file.3 file.4 file.5 \ + | awk -f print_2.awk \ + | sort >expected && + git -C r1 pack-objects --rev --stdout >all.pack <<-EOF && + HEAD + EOF + git -C r1 index-pack ../all.pack && + git -C r1 verify-pack -v ../all.pack \ + | grep blob \ + | awk -f print_1.awk \ + | sort >observed && + test_cmp observed expected +' + +test_expect_success 'verify blob:none packfile has no blobs' ' + git -C r1 pack-objects --rev --stdout --filter=blob:none >filter.pack <<-EOF && + HEAD + EOF + git -C r1 index-pack ../filter.pack && + git -C r1 verify-pack -v ../filter.pack \ + | grep blob \ + | awk -f print_1.awk \ + | sort >observed && + nr=$(wc -l expected && + git -C r1 verify-pack -v ../filter.pack \ + | grep -E "commit|tree" \ + | awk -f print_1.awk \ + | sort >observed && + test_cmp observed expected +' + +# Test blob:limit=[kmg] filter. +# We boundary test around the size parameter. The filter is strictly less than +# the value, so size 500 and 1000 should have the same results, but 1001 should +# filter more. + +test_expect_success 'setup r2' ' + git init r2 && + for n in 1000 10000 + do + printf "%"$n"s" X > r2/large.$n + git -C r2 add large.$n + git -C r2 commit -m "$n" + done +' + +test_expect_success 'verify blob count in normal packfile' ' + git -C r2 ls-files -s large.1000 large.10000 \ + | awk -f print_2.awk \ + | sort >expected && + git -C r2 pack-objects --rev --stdout >all.pack <<-EOF && + HEAD + EOF + git -C r2 index-pack ../all.pack && + git -C r2 verify-pack -v ../all.pack \ + | grep blob \ + | awk -f print_1.awk \ + | sort >observed && + test_cmp observed expected +' + +test_expect_success 'verify blob:limit=500 omits all blobs' ' + git -C r2 pack-objects --rev --stdout --filter=blob:limit=500 >filter.pack <<-EOF && + HEAD + EOF + git -C r2 index-pack ../filter.pack && + git -C r2 verify-pack -v ../filter.pack \ + | grep blob \ + | awk -f print_1.awk \ + | sort >observed && + nr=$(wc -l filter.pack <<-EOF && + HEAD + EOF + git -C r2 index-pack ../filter.pack && + git -C r2 verify-pack -v ../filter.pack \ + | grep blob \ + | awk -f print_1.awk \ + | sort >observed && + nr=$(wc -l expected && + git -C r2 pack-objects --rev --stdout --filter=blob:limit=1001 >filter.pack <<-EOF && + HEAD + EOF + git -C r2 index-pack ../filter.pack && + git -C r2 verify-pack -v ../filter.pack \ + | grep blob \ + | awk -f print_1.awk \ + | sort >observed && + test_cmp observed expected +' + +test_expect_success 'verify blob:limit=10001' ' + git -C r2 ls-files -s large.1000 large.10000 \ + | awk -f print_2.awk \ + | sort >expected && + git -C r2 pack-objects --rev --stdout --filter=blob:limit=10001 >filter.pack <<-EOF && + HEAD + EOF + git -C r2 index-pack ../filter.pack && + git -C r2 verify-pack -v ../filter.pack \ + | grep blob \ + | awk -f print_1.awk \ + | sort >observed && + test_cmp observed expected +' + +test_expect_success 'verify blob:limit=1k' ' + git -C r2 ls-files -s large.1000 \ + | awk -f print_2.awk \ + | sort >expected && + git -C r2 pack-objects --rev --stdout --filter=blob:limit=1k >filter.pack <<-EOF && + HEAD + EOF + git -C r2 index-pack ../filter.pack && + git -C r2 verify-pack -v ../filter.pack \ + | grep blob \ + | awk -f print_1.awk \ + | sort >observed && + test_cmp observed expected +' + +test_expect_success 'verify blob:limit=1m' ' + git -C r2 ls-files -s large.1000 large.10000 \ + | awk -f print_2.awk \ + | sort >expected && + git -C r2 pack-objects --rev --stdout --filter=blob:limit=1m >filter.pack <<-EOF && + HEAD + EOF + git -C r2 index-pack ../filter.pack && + git -C r2 verify-pack -v ../filter.pack \ + | grep blob \ + | awk -f print_1.awk \ + | sort >observed && + test_cmp observed expected +' + +test_expect_success 'verify normal and blob:limit packfiles have same commits/trees' ' + git -C r2 verify-pack -v ../all.pack \ + | grep -E "commit|tree" \ + | awk -f print_1.awk \ + | sort >expected && + git -C r2 verify-pack -v ../filter.pack \ + | grep -E "commit|tree" \ + | awk -f print_1.awk \ + | sort >observed && + test_cmp observed expected +' + +# Test sparse:path= filter. +# Use a local file containing a sparse-checkout specification to filter +# out blobs not required for the corresponding sparse-checkout. We do not +# require sparse-checkout to actually be enabled. + +test_expect_success 'setup r3' ' + git init r3 && + mkdir r3/dir1 && + for n in sparse1 sparse2 + do + echo "This is file: $n" > r3/$n + git -C r3 add $n + echo "This is file: dir1/$n" > r3/dir1/$n + git -C r3 add dir1/$n + done && + git -C r3 commit -m "sparse" && + echo dir1/ >pattern1 && + echo sparse1 >pattern2 +' + +test_expect_success 'verify blob count in normal packfile' ' + git -C r3 ls-files -s sparse1 sparse2 dir1/sparse1 dir1/sparse2 \ + | awk -f print_2.awk \ + | sort >expected && + git -C r3 pack-objects --rev --stdout >all.pack <<-EOF && + HEAD + EOF + git -C r3 index-pack ../all.pack && + git -C r3 verify-pack -v ../all.pack \ + | grep blob \ + | awk -f print_1.awk \ + | sort >observed && + test_cmp observed expected +' + +test_expect_success 'verify sparse:path=pattern1' ' + git -C r3 ls-files -s dir1/sparse1 dir1/sparse2 \ + | awk -f print_2.awk \ + | sort >expected && + git -C r3 pack-objects --rev --stdout --filter=sparse:path=../pattern1 >filter.pack <<-EOF && + HEAD + EOF + git -C r3 index-pack ../filter.pack && + git -C r3 verify-pack -v ../filter.pack \ + | grep blob \ + | awk -f print_1.awk \ + | sort >observed && + test_cmp observed expected +' + +test_expect_success 'verify normal and sparse:path=pattern1 packfiles have same commits/trees' ' + git -C r3 verify-pack -v ../all.pack \ + | grep -E "commit|tree" \ + | awk -f print_1.awk \ + | sort >expected && + git -C r3 verify-pack -v ../filter.pack \ + | grep -E "commit|tree" \ + | awk -f print_1.awk \ + | sort >observed && + test_cmp observed expected +' + +test_expect_success 'verify sparse:path=pattern2' ' + git -C r3 ls-files -s sparse1 dir1/sparse1 \ + | awk -f print_2.awk \ + | sort >expected && + git -C r3 pack-objects --rev --stdout --filter=sparse:path=../pattern2 >filter.pack <<-EOF && + HEAD + EOF + git -C r3 index-pack ../filter.pack && + git -C r3 verify-pack -v ../filter.pack \ + | grep blob \ + | awk -f print_1.awk \ + | sort >observed && + test_cmp observed expected +' + +test_expect_success 'verify normal and sparse:path=pattern2 packfiles have same commits/trees' ' + git -C r3 verify-pack -v ../all.pack \ + | grep -E "commit|tree" \ + | awk -f print_1.awk \ + | sort >expected && + git -C r3 verify-pack -v ../filter.pack \ + | grep -E "commit|tree" \ + | awk -f print_1.awk \ + | sort >observed && + test_cmp observed expected +' + +# Test sparse:oid= filter. +# Like sparse:path, but we get the sparse-checkout specification from +# a blob rather than a file on disk. + +test_expect_success 'setup r4' ' + git init r4 && + mkdir r4/dir1 && + for n in sparse1 sparse2 + do + echo "This is file: $n" > r4/$n + git -C r4 add $n + echo "This is file: dir1/$n" > r4/dir1/$n + git -C r4 add dir1/$n + done && + echo dir1/ >r4/pattern && + git -C r4 add pattern && + git -C r4 commit -m "pattern" +' + +test_expect_success 'verify blob count in normal packfile' ' + git -C r4 ls-files -s pattern sparse1 sparse2 dir1/sparse1 dir1/sparse2 \ + | awk -f print_2.awk \ + | sort >expected && + git -C r4 pack-objects --rev --stdout >all.pack <<-EOF && + HEAD + EOF + git -C r4 index-pack ../all.pack && + git -C r4 verify-pack -v ../all.pack \ + | grep blob \ + | awk -f print_1.awk \ + | sort >observed && + test_cmp observed expected +' + +test_expect_success 'verify sparse:oid=OID' ' + git -C r4 ls-files -s dir1/sparse1 dir1/sparse2 \ + | awk -f print_2.awk \ + | sort >expected && + oid=$(git -C r4 ls-files -s pattern | awk -f print_2.awk) && + git -C r4 pack-objects --rev --stdout --filter=sparse:oid=$oid >filter.pack <<-EOF && + HEAD + EOF + git -C r4 index-pack ../filter.pack && + git -C r4 verify-pack -v ../filter.pack \ + | grep blob \ + | awk -f print_1.awk \ + | sort >observed && + test_cmp observed expected +' + +test_expect_success 'verify sparse:oid=oid-ish' ' + git -C r4 ls-files -s dir1/sparse1 dir1/sparse2 \ + | awk -f print_2.awk \ + | sort >expected && + git -C r4 pack-objects --rev --stdout --filter=sparse:oid=master:pattern >filter.pack <<-EOF && + HEAD + EOF + git -C r4 index-pack ../filter.pack && + git -C r4 verify-pack -v ../filter.pack \ + | grep blob \ + | awk -f print_1.awk \ + | sort >observed && + test_cmp observed expected +' + +# Delete some loose objects and use pack-objects, but WITHOUT any filtering. +# This models previously omitted objects that we did not receive. + +test_expect_success 'setup r1 - delete loose blobs' ' + git -C r1 ls-files -s file.1 file.2 file.3 file.4 file.5 \ + | awk -f print_2.awk \ + | sort >expected && + for id in `cat expected | sed "s|..|&/|"` + do + rm r1/.git/objects/$id + done +' + +test_expect_success 'verify pack-objects fails w/ missing objects' ' + test_must_fail git -C r1 pack-objects --rev --stdout >miss.pack <<-EOF + HEAD + EOF +' + +if ! test_have_prereq TODO; then + skip_all='TODO Allow pack-objects to work with missing objects' + test_done +fi + +test_expect_success 'verify pack-objects w/ extension.partialcloneremote set succeeds' ' + git -C r1 config --local core.repositoryformatversion 1 && + git -C r1 config --local extensions.partialcloneremote "origin" && + git -C r1 pack-objects --rev --stdout >miss.pack <<-EOF + HEAD + EOF +' + +test_expect_success 'veify pack-objects w/ extension.partialclonefilter set succeeds' ' + git -C r1 config --local core.repositoryformatversion 1 && + git -C r1 config --local extensions.partialclonefilter "something" && + git -C r1 pack-objects --rev --stdout >miss.pack <<-EOF + HEAD + EOF +' + +test_done From 75222b315152958435741bb3de0ab60b5911ab65 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 7 Sep 2017 18:10:19 +0000 Subject: [PATCH 14/24] pack-protocol: document list-objects filtering Signed-off-by: Jeff Hostetler --- Documentation/technical/pack-protocol.txt | 9 +++++++++ Documentation/technical/protocol-capabilities.txt | 8 ++++++++ 2 files changed, 17 insertions(+) diff --git a/Documentation/technical/pack-protocol.txt b/Documentation/technical/pack-protocol.txt index ed1eae8b83a651..68e1400fe26e03 100644 --- a/Documentation/technical/pack-protocol.txt +++ b/Documentation/technical/pack-protocol.txt @@ -212,6 +212,7 @@ out of what the server said it could do with the first 'want' line. upload-request = want-list *shallow-line *1depth-request + [filter-request] flush-pkt want-list = first-want @@ -227,6 +228,8 @@ out of what the server said it could do with the first 'want' line. additional-want = PKT-LINE("want" SP obj-id) depth = 1*DIGIT + + filter-request = PKT-LINE("filter" SP "filter-spec") ---- Clients MUST send all the obj-ids it wants from the reference @@ -249,6 +252,12 @@ complete those commits. Commits whose parents are not received as a result are defined as shallow and marked as such in the server. This information is sent back to the client in the next step. +The client can optionally request that pack-objects omit various +objects from the packfile using one of several filtering techniques. +These are intended for use with partial clone/fetch operations. +The value of "filter-spec" is passed by upload-pack to pack-objects +using the `--filter=` parameter. + Once all the 'want's and 'shallow's (and optional 'deepen') are transferred, clients MUST send a flush-pkt, to tell the server side that it is done sending the list. diff --git a/Documentation/technical/protocol-capabilities.txt b/Documentation/technical/protocol-capabilities.txt index 26dcc6f502020d..2b94679fc9f420 100644 --- a/Documentation/technical/protocol-capabilities.txt +++ b/Documentation/technical/protocol-capabilities.txt @@ -309,3 +309,11 @@ to accept a signed push certificate, and asks the to be included in the push certificate. A send-pack client MUST NOT send a push-cert packet unless the receive-pack server advertises this capability. + +filter-objects +-------------- + +If the upload-pack server advertises the 'filter' capability, +fetch-pack may send a "filter " command to request +a partial clone or fetch where the server omits various objects +from the packfile. From 200d6c92118cb6102147377593ce1509c81df1ef Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 7 Sep 2017 19:22:12 +0000 Subject: [PATCH 15/24] upload-pack: add list-objects filtering Teach upload-pack to accept list-objects filtering parameters over the git protocol and pass them to pack-objects. Signed-off-by: Jeff Hostetler --- upload-pack.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/upload-pack.c b/upload-pack.c index e25f725c0feaa5..ff88ea17541002 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -18,6 +18,7 @@ #include "parse-options.h" #include "argv-array.h" #include "prio-queue.h" +#include "list-objects-filter-options.h" static const char * const upload_pack_usage[] = { N_("git upload-pack [] "), @@ -64,6 +65,9 @@ static int advertise_refs; static int stateless_rpc; static const char *pack_objects_hook; +static int capability_filter_objects_requested; +static struct list_objects_filter_options filter_options; + static void reset_timeout(void) { alarm(timeout); @@ -132,6 +136,14 @@ static void create_pack_file(void) if (use_include_tag) argv_array_push(&pack_objects.args, "--include-tag"); + /* + * TODO Do we need to quote raw_value? + */ + if (filter_options.choice) + argv_array_pushf(&pack_objects.args, "--%s=%s", + CL_ARG__FILTER, + filter_options.raw_value); + pack_objects.in = -1; pack_objects.out = -1; pack_objects.err = -1; @@ -794,6 +806,12 @@ static void receive_needs(void) deepen_rev_list = 1; continue; } + if (skip_prefix(line, (CL_ARG__FILTER " "), &arg)) { + parse_list_objects_filter(&filter_options, arg); + if (filter_options.choice && !capability_filter_objects_requested) + die("git upload-pack: filtering capability not negotiated"); + continue; + } if (!skip_prefix(line, "want ", &arg) || get_oid_hex(arg, &oid_buf)) die("git upload-pack: protocol error, " @@ -821,6 +839,8 @@ static void receive_needs(void) no_progress = 1; if (parse_feature_request(features, "include-tag")) use_include_tag = 1; + if (parse_feature_request(features, CL_ARG__FILTER)) + capability_filter_objects_requested = 1; o = parse_object(&oid_buf); if (!o) { @@ -929,7 +949,8 @@ static int send_ref(const char *refname, const struct object_id *oid, { static const char *capabilities = "multi_ack thin-pack side-band" " side-band-64k ofs-delta shallow deepen-since deepen-not" - " deepen-relative no-progress include-tag multi_ack_detailed"; + " deepen-relative no-progress include-tag multi_ack_detailed" + " " CL_ARG__FILTER; const char *refname_nons = strip_namespace(refname); struct object_id peeled; From e81a22d19d8f185ff5f15690d55d451fa1a4c80a Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 7 Sep 2017 19:33:45 +0000 Subject: [PATCH 16/24] transport: add list-objects filtering Signed-off-by: Jeff Hostetler --- transport.c | 5 +++++ transport.h | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/transport.c b/transport.c index f1e2f61991424f..1629df35890b1e 100644 --- a/transport.c +++ b/transport.c @@ -161,6 +161,10 @@ static int set_git_option(struct git_transport_options *opts, opts->deepen_relative = !!value; return 0; } + else if (!strcmp(name, TRANS_OPT_LIST_OBJECTS_FILTER)) { + parse_list_objects_filter(&opts->filter_options, value); + return 0; + } return 1; } @@ -228,6 +232,7 @@ static int fetch_refs_via_pack(struct transport *transport, data->options.check_self_contained_and_connected; args.cloning = transport->cloning; args.update_shallow = data->options.update_shallow; + args.filter_options = data->options.filter_options; if (!data->got_remote_heads) { connect_setup(transport, 0); diff --git a/transport.h b/transport.h index bc5571574b6780..23e622b318dbfd 100644 --- a/transport.h +++ b/transport.h @@ -4,6 +4,7 @@ #include "cache.h" #include "run-command.h" #include "remote.h" +#include "list-objects-filter-options.h" struct string_list; @@ -21,6 +22,7 @@ struct git_transport_options { const char *uploadpack; const char *receivepack; struct push_cas_option *cas; + struct list_objects_filter_options filter_options; }; enum transport_family { @@ -210,6 +212,9 @@ void transport_check_allowed(const char *type); /* Send push certificates */ #define TRANS_OPT_PUSH_CERT "pushcert" +/* See Documentation/technical/pack-protocol.txt */ +#define TRANS_OPT_LIST_OBJECTS_FILTER CL_ARG__FILTER + /** * Returns 0 if the option was used, non-zero otherwise. Prints a * message to stderr if the option is not used. From 9d849849cb092ccb29c699abc45ada6472adf308 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Wed, 18 Oct 2017 19:23:18 +0000 Subject: [PATCH 17/24] transport-helper: add object filtering for partial clone Signed-off-by: Jeff Hostetler --- transport-helper.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/transport-helper.c b/transport-helper.c index c948d5215c22fb..86a8abe97039dd 100644 --- a/transport-helper.c +++ b/transport-helper.c @@ -671,6 +671,11 @@ static int fetch(struct transport *transport, if (data->transport_options.update_shallow) set_helper_option(transport, "update-shallow", "true"); + if (data->transport_options.filter_options.choice) + set_helper_option( + transport, TRANS_OPT_LIST_OBJECTS_FILTER, + data->transport_options.filter_options.raw_value); + if (data->fetch) return fetch_with_fetch(transport, nr_heads, to_fetch); From 4d56a2068ed00d2332021e46861f8404c07a7f05 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 7 Sep 2017 20:03:50 +0000 Subject: [PATCH 18/24] index-pack: respect extension.partialClone Teach index-pack to not complain about missing objects when extension.partialClone is set. Signed-off-by: Jeff Hostetler --- builtin/index-pack.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 8ec459f5225228..5930615eed30a7 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -13,6 +13,7 @@ #include "streaming.h" #include "thread-utils.h" #include "packfile.h" +#include "partial-clone-utils.h" static const char index_pack_usage[] = "git index-pack [-v] [-o ] [--keep | --keep=] [--verify] [--strict] ( | --stdin [--fix-thin] [])"; @@ -222,6 +223,17 @@ static unsigned check_object(struct object *obj) if (!(obj->flags & FLAG_CHECKED)) { unsigned long size; int type = sha1_object_info(obj->oid.hash, &size); + + if (type <= 0 && is_partial_clone_registered()) { + /* + * Relax consistency checks to not complain about + * missing objects (because of earlier partial + * clone or fetch). + */ + obj->flags |= FLAG_CHECKED; + return 0; + } + if (type <= 0) die(_("did not receive expected object %s"), oid_to_hex(&obj->oid)); From bf537f457520b35df2a2c74778553868fe7d80da Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 7 Sep 2017 19:50:26 +0000 Subject: [PATCH 19/24] fetch-pack: add list-objects filtering Teach fetch-pack to accept and pass list-objects filtering parameters to upload-pack. Signed-off-by: Jeff Hostetler --- builtin/fetch-pack.c | 5 +++++ fetch-pack.c | 16 ++++++++++++++++ fetch-pack.h | 2 ++ 3 files changed, 23 insertions(+) diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c index 366b9d13f929b7..0d0d9611b125cb 100644 --- a/builtin/fetch-pack.c +++ b/builtin/fetch-pack.c @@ -143,6 +143,11 @@ int cmd_fetch_pack(int argc, const char **argv, const char *prefix) args.update_shallow = 1; continue; } + if (skip_prefix(arg, ("--" CL_ARG__FILTER "="), &arg)) { + parse_list_objects_filter(&args.filter_options, arg); + continue; + } + usage(fetch_pack_usage); } if (deepen_not.nr) diff --git a/fetch-pack.c b/fetch-pack.c index 008b25d3db0872..d76f08c55f760d 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -377,6 +377,8 @@ static int find_common(struct fetch_pack_args *args, if (prefer_ofs_delta) strbuf_addstr(&c, " ofs-delta"); if (deepen_since_ok) strbuf_addstr(&c, " deepen-since"); if (deepen_not_ok) strbuf_addstr(&c, " deepen-not"); + if (args->filter_options.choice) + strbuf_addstr(&c, (" " CL_ARG__FILTER)); if (agent_supported) strbuf_addf(&c, " agent=%s", git_user_agent_sanitized()); packet_buf_write(&req_buf, "want %s%s\n", remote_hex, c.buf); @@ -407,6 +409,14 @@ static int find_common(struct fetch_pack_args *args, packet_buf_write(&req_buf, "deepen-not %s", s->string); } } + + /* + * TODO Do we need to quote raw_value? + */ + if (args->filter_options.choice) + packet_buf_write(&req_buf, (CL_ARG__FILTER " %s"), + args->filter_options.raw_value); + packet_buf_flush(&req_buf); state_len = req_buf.len; @@ -850,6 +860,7 @@ static int get_pack(struct fetch_pack_args *args, "--keep=fetch-pack %"PRIuMAX " on %s", (uintmax_t)getpid(), hostname); } + if (args->check_self_contained_and_connected) argv_array_push(&cmd.args, "--check-self-contained-and-connected"); } @@ -963,6 +974,11 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args, else prefer_ofs_delta = 0; + if (server_supports(CL_ARG__FILTER)) + print_verbose(args, _("Server supports " CL_ARG__FILTER)); + else if (args->filter_options.choice) + die("Server does not support %s", CL_ARG__FILTER); + if ((agent_feature = server_feature_value("agent", &agent_len))) { agent_supported = 1; if (agent_len) diff --git a/fetch-pack.h b/fetch-pack.h index b6aeb43a8e2143..72690653489eac 100644 --- a/fetch-pack.h +++ b/fetch-pack.h @@ -3,6 +3,7 @@ #include "string-list.h" #include "run-command.h" +#include "list-objects-filter-options.h" struct oid_array; @@ -12,6 +13,7 @@ struct fetch_pack_args { int depth; const char *deepen_since; const struct string_list *deepen_not; + struct list_objects_filter_options filter_options; unsigned deepen_relative:1; unsigned quiet:1; unsigned keep_pack:1; From 8a645fcfe85d0b2cb37d73e9a160fb7b70451daa Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 7 Sep 2017 20:38:50 +0000 Subject: [PATCH 20/24] clone: add list-objects filtering Signed-off-by: Jeff Hostetler --- builtin/clone.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/builtin/clone.c b/builtin/clone.c index dbddd98f80d666..7a869b15184573 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -26,6 +26,7 @@ #include "run-command.h" #include "connected.h" #include "packfile.h" +#include "list-objects-filter-options.h" /* * Overall FIXMEs: @@ -60,6 +61,7 @@ static struct string_list option_optional_reference = STRING_LIST_INIT_NODUP; static int option_dissociate; static int max_jobs = -1; static struct string_list option_recurse_submodules = STRING_LIST_INIT_NODUP; +static struct list_objects_filter_options filter_options; static int recurse_submodules_cb(const struct option *opt, const char *arg, int unset) @@ -135,6 +137,7 @@ static struct option builtin_clone_options[] = { TRANSPORT_FAMILY_IPV4), OPT_SET_INT('6', "ipv6", &family, N_("use IPv6 addresses only"), TRANSPORT_FAMILY_IPV6), + OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options), OPT_END() }; @@ -1073,6 +1076,8 @@ int cmd_clone(int argc, const char **argv, const char *prefix) warning(_("--shallow-since is ignored in local clones; use file:// instead.")); if (option_not.nr) warning(_("--shallow-exclude is ignored in local clones; use file:// instead.")); + if (filter_options.choice) + warning(_("Object filtering is ignored in local clones; use file:// instead.")); if (!access(mkpath("%s/shallow", path), F_OK)) { if (option_local > 0) warning(_("source repository is shallow, ignoring --local")); @@ -1104,6 +1109,10 @@ int cmd_clone(int argc, const char **argv, const char *prefix) transport_set_option(transport, TRANS_OPT_UPLOADPACK, option_upload_pack); + if (filter_options.choice) + transport_set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER, + filter_options.raw_value); + if (transport->smart_options && !deepen) transport->smart_options->check_self_contained_and_connected = 1; From 50d4e91faa7953489738483a1f6cee6f33e566dc Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Wed, 18 Oct 2017 17:13:02 +0000 Subject: [PATCH 21/24] clone: set extensions.partialclone during partial clone Teach clone to support setting extensions.partialclone and core.repositoryformatversion during a partial clone. Based upon a patch from Jonathan Tan Signed-off-by: Jeff Hostetler --- builtin/clone.c | 13 +++++++++---- remote.c | 2 ++ remote.h | 2 ++ 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/builtin/clone.c b/builtin/clone.c index 7a869b15184573..5c392575444572 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -26,7 +26,7 @@ #include "run-command.h" #include "connected.h" #include "packfile.h" -#include "list-objects-filter-options.h" +#include "partial-clone-utils.h" /* * Overall FIXMEs: @@ -1077,7 +1077,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix) if (option_not.nr) warning(_("--shallow-exclude is ignored in local clones; use file:// instead.")); if (filter_options.choice) - warning(_("Object filtering is ignored in local clones; use file:// instead.")); + warning(_("Partial clone is ignored in local clones; use file:// instead.")); if (!access(mkpath("%s/shallow", path), F_OK)) { if (option_local > 0) warning(_("source repository is shallow, ignoring --local")); @@ -1113,7 +1113,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix) transport_set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER, filter_options.raw_value); - if (transport->smart_options && !deepen) + if (transport->smart_options && !deepen && !filter_options.choice) transport->smart_options->check_self_contained_and_connected = 1; refs = transport_get_remote_refs(transport); @@ -1173,13 +1173,18 @@ int cmd_clone(int argc, const char **argv, const char *prefix) write_refspec_config(src_ref_prefix, our_head_points_at, remote_head_points_at, &branch_top); + if (filter_options.choice) + partial_clone_utils_register(&filter_options, "origin", + "clone"); + if (is_local) clone_local(path, git_dir); else if (refs && complete_refs_before_fetch) transport_fetch_refs(transport, mapped_refs); update_remote_refs(refs, mapped_refs, remote_head_points_at, - branch_top.buf, reflog_msg.buf, transport, !is_local); + branch_top.buf, reflog_msg.buf, transport, + !is_local && !filter_options.choice); update_head(our_head_points_at, remote_head, reflog_msg.buf); diff --git a/remote.c b/remote.c index b220f0dfc619a6..e2d703e2e6bc80 100644 --- a/remote.c +++ b/remote.c @@ -440,6 +440,8 @@ static int handle_config(const char *key, const char *value, void *cb) key, value); } else if (!strcmp(subkey, "vcs")) { return git_config_string(&remote->foreign_vcs, key, value); + } else if (!strcmp(subkey, "partialclone")) { + return git_config_string(&remote->partial_clone, key, value); } return 0; } diff --git a/remote.h b/remote.h index 2ecf4c8c74ce59..0ce3d1dfbef4bf 100644 --- a/remote.h +++ b/remote.h @@ -56,6 +56,8 @@ struct remote { */ char *http_proxy; char *http_proxy_authmethod; + + const char *partial_clone; }; struct remote *remote_get(const char *name); From 09a1a77846a2c8f10f51dd0fef28cb0fcdec644f Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Thu, 28 Sep 2017 17:39:47 -0700 Subject: [PATCH 22/24] fetch: refactor calculation of remote list Separate out the calculation of remotes to be fetched from and the actual fetching. This will allow us to include an additional step before the actual fetching in a subsequent commit. Signed-off-by: Jonathan Tan --- builtin/fetch.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/builtin/fetch.c b/builtin/fetch.c index 225c734924f148..1b1f03923df471 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -1322,7 +1322,7 @@ int cmd_fetch(int argc, const char **argv, const char *prefix) { int i; struct string_list list = STRING_LIST_INIT_DUP; - struct remote *remote; + struct remote *remote = NULL; int result = 0; struct argv_array argv_gc_auto = ARGV_ARRAY_INIT; @@ -1367,17 +1367,14 @@ int cmd_fetch(int argc, const char **argv, const char *prefix) else if (argc > 1) die(_("fetch --all does not make sense with refspecs")); (void) for_each_remote(get_one_remote_for_fetch, &list); - result = fetch_multiple(&list); } else if (argc == 0) { /* No arguments -- use default remote */ remote = remote_get(NULL); - result = fetch_one(remote, argc, argv); } else if (multiple) { /* All arguments are assumed to be remotes or groups */ for (i = 0; i < argc; i++) if (!add_remote_or_group(argv[i], &list)) die(_("No such remote or remote group: %s"), argv[i]); - result = fetch_multiple(&list); } else { /* Single remote or group */ (void) add_remote_or_group(argv[0], &list); @@ -1385,14 +1382,19 @@ int cmd_fetch(int argc, const char **argv, const char *prefix) /* More than one remote */ if (argc > 1) die(_("Fetching a group and specifying refspecs does not make sense")); - result = fetch_multiple(&list); } else { /* Zero or one remotes */ remote = remote_get(argv[0]); - result = fetch_one(remote, argc-1, argv+1); + argc--; + argv++; } } + if (remote) + result = fetch_one(remote, argc, argv); + else + result = fetch_multiple(&list); + if (!result && (recurse_submodules != RECURSE_SUBMODULES_OFF)) { struct argv_array options = ARGV_ARRAY_INIT; From 56653cd3cd4b699de7e0bedf6ba87b5312c676db Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 7 Sep 2017 20:57:24 +0000 Subject: [PATCH 23/24] fetch: add list-objects filtering parameters Signed-off-by: Jeff Hostetler --- builtin/fetch.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 1 deletion(-) diff --git a/builtin/fetch.c b/builtin/fetch.c index 1b1f03923df471..a30481184f9e8c 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -18,6 +18,7 @@ #include "argv-array.h" #include "utf8.h" #include "packfile.h" +#include "partial-clone-utils.h" static const char * const builtin_fetch_usage[] = { N_("git fetch [] [ [...]]"), @@ -55,6 +56,7 @@ static int recurse_submodules_default = RECURSE_SUBMODULES_ON_DEMAND; static int shown_url = 0; static int refmap_alloc, refmap_nr; static const char **refmap_array; +static struct list_objects_filter_options filter_options; static int git_fetch_config(const char *k, const char *v, void *cb) { @@ -160,6 +162,7 @@ static struct option builtin_fetch_options[] = { TRANSPORT_FAMILY_IPV4), OPT_SET_INT('6', "ipv6", &family, N_("use IPv6 addresses only"), TRANSPORT_FAMILY_IPV6), + OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options), OPT_END() }; @@ -754,6 +757,7 @@ static int store_updated_refs(const char *raw_url, const char *remote_name, const char *filename = dry_run ? "/dev/null" : git_path_fetch_head(); int want_status; int summary_width = transport_summary_width(ref_map); + struct check_connected_options opt = CHECK_CONNECTED_INIT; fp = fopen(filename, "a"); if (!fp) @@ -765,7 +769,7 @@ static int store_updated_refs(const char *raw_url, const char *remote_name, url = xstrdup("foreign"); rm = ref_map; - if (check_connected(iterate_ref_map, &rm, NULL)) { + if (check_connected(iterate_ref_map, &rm, &opt)) { rc = error(_("%s did not send all necessary objects\n"), url); goto abort; } @@ -1044,6 +1048,9 @@ static struct transport *prepare_transport(struct remote *remote, int deepen) set_option(transport, TRANS_OPT_DEEPEN_RELATIVE, "yes"); if (update_shallow) set_option(transport, TRANS_OPT_UPDATE_SHALLOW, "yes"); + if (filter_options.choice) + set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER, + filter_options.raw_value); return transport; } @@ -1242,6 +1249,20 @@ static int fetch_multiple(struct string_list *list) int i, result = 0; struct argv_array argv = ARGV_ARRAY_INIT; + if (filter_options.choice) { + /* + * We currently only support partial-fetches + * to the remote used for the partial-clone + * because we only support 1 promisor remote. + * + * Note that the loop below will spawn background + * fetches for each remote and one of them may + * INHERIT partial-fetch settings, so everything + * is consistent. + */ + die(_("partial-fetch is not supported on multiple remotes")); + } + if (!append && !dry_run) { int errcode = truncate_fetch_head(); if (errcode) @@ -1267,6 +1288,45 @@ static int fetch_multiple(struct string_list *list) return result; } +static inline void partial_fetch_one_setup(struct remote *remote) +{ + if (filter_options.choice) { + /* + * A partial-fetch was explicitly requested. + * + * If this is the first partial-* command on + * this repo, we must register the partial + * settings in the repository extension. + * + * If this follows a previous partial-* command + * we must ensure the args are consistent with + * the existing registration (because we don't + * currently support mixing-and-matching). + */ + partial_clone_utils_register(&filter_options, + remote->name, "fetch"); + return; + } + + if (is_partial_clone_registered() && + !strcmp(remote->name, repository_format_partial_clone_remote)) { + /* + * If a partial-* command has already been used on + * this repo and it was to this remote, we should + * inherit the filter settings used previously. + * That is, if clone omitted very large blobs, then + * fetch should too. + * + * Use the cached filter-spec and create the filter + * settings. + */ + parse_list_objects_filter( + &filter_options, + repository_format_partial_clone_filter); + } +} + + static int fetch_one(struct remote *remote, int argc, const char **argv) { static const char **refs = NULL; @@ -1278,6 +1338,9 @@ static int fetch_one(struct remote *remote, int argc, const char **argv) die(_("No remote repository specified. Please, specify either a URL or a\n" "remote name from which new revisions should be fetched.")); + partial_fetch_one_setup(remote); + + gtransport = prepare_transport(remote, 1); if (prune < 0) { From 503fb507d1dcf6e050762cdf995199554dcf6468 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Wed, 18 Oct 2017 19:22:57 +0000 Subject: [PATCH 24/24] remote-curl: add object filtering for partial clone Signed-off-by: Jeff Hostetler --- remote-curl.c | 9 +++++++++ remote.c | 4 ++-- remote.h | 4 +++- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/remote-curl.c b/remote-curl.c index 0053b09549ab41..44ceaaed734a31 100644 --- a/remote-curl.c +++ b/remote-curl.c @@ -13,6 +13,7 @@ #include "credential.h" #include "sha1-array.h" #include "send-pack.h" +#include "list-objects-filter-options.h" static struct remote *remote; /* always ends with a trailing slash */ @@ -22,6 +23,7 @@ struct options { int verbosity; unsigned long depth; char *deepen_since; + char *partial_clone_filter; struct string_list deepen_not; struct string_list push_options; unsigned progress : 1, @@ -157,6 +159,9 @@ static int set_option(const char *name, const char *value) return -1; return 0; #endif /* LIBCURL_VERSION_NUM >= 0x070a08 */ + } else if (!strcmp(name, REMOTE_KEY_PARTIAL_CLONE_FILTER)) { + options.partial_clone_filter = xstrdup(value); + return 0; } else { return 1 /* unsupported */; } @@ -822,6 +827,10 @@ static int fetch_git(struct discovery *heads, options.deepen_not.items[i].string); if (options.deepen_relative && options.depth) argv_array_push(&args, "--deepen-relative"); + if (options.partial_clone_filter) + argv_array_pushf(&args, "--%s=%s", + CL_ARG__FILTER, options.partial_clone_filter); + argv_array_push(&args, url.buf); for (i = 0; i < nr_heads; i++) { diff --git a/remote.c b/remote.c index e2d703e2e6bc80..6874c5fed293ee 100644 --- a/remote.c +++ b/remote.c @@ -440,8 +440,8 @@ static int handle_config(const char *key, const char *value, void *cb) key, value); } else if (!strcmp(subkey, "vcs")) { return git_config_string(&remote->foreign_vcs, key, value); - } else if (!strcmp(subkey, "partialclone")) { - return git_config_string(&remote->partial_clone, key, value); + } else if (!strcmp(subkey, REMOTE_KEY_PARTIAL_CLONE_FILTER)) { + return git_config_string(&remote->partial_clone_filter, key, value); } return 0; } diff --git a/remote.h b/remote.h index 0ce3d1dfbef4bf..7c2267fc872b4b 100644 --- a/remote.h +++ b/remote.h @@ -57,9 +57,11 @@ struct remote { char *http_proxy; char *http_proxy_authmethod; - const char *partial_clone; + const char *partial_clone_filter; }; +#define REMOTE_KEY_PARTIAL_CLONE_FILTER "partialclonefilter" + struct remote *remote_get(const char *name); struct remote *pushremote_get(const char *name); int remote_is_configured(struct remote *remote, int in_repo);