From 5ff25b8384e10b7eac6e6b83c23390b1789dac2b Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Tue, 24 Dec 2024 13:07:38 +0100 Subject: [PATCH 001/775] git-gui: po/README: update repository location and maintainer Signed-off-by: Johannes Sixt --- po/README | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/po/README b/po/README index 116233100d781a..ea26ea8dbd4f16 100644 --- a/po/README +++ b/po/README @@ -21,8 +21,8 @@ them. You would then need to clone the git-gui project repository and create a feature branch to begin working: - $ git clone git://repo.or.cz/git-gui.git - $ cd git-gui.git + $ git clone https://github.com/j6t/git-gui + $ cd git-gui $ git checkout -b my-translation The "git checkout" command creates a new branch to keep your work @@ -153,7 +153,7 @@ your patch series to the maintainer and the Git mailing list: $ git add po/af.po $ git commit -s -m 'git-gui: added Afrikaans translation.' $ git send-email --to 'git@vger.kernel.org' \ - --cc 'Pat Thoyts ' \ + --cc 'Johannes Sixt ' \ --subject 'git-gui: Afrikaans translation' \ master.. From cb3e368b6913a2446f245e7ae7a7e4ae4a2e6794 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 18 Feb 2025 10:20:37 +0100 Subject: [PATCH 002/775] reftable/stack: stop using `read_in_full()` There is a single callsite of `read_in_full()` in the reftable library. Open-code the function to reduce our dependency on the Git library. Note that we only partially port over the logic from `read_in_full()` and its underlying `xread()` helper. Most importantly, the latter also knows to handle `EWOULDBLOCK` via `handle_nonblock()`. This logic is irrelevant for us though because the reftable library never sets the `O_NONBLOCK` option in the first place. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/stack.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/reftable/stack.c b/reftable/stack.c index 6c4e8be19b1050..1cc47b94d9c89e 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -115,13 +115,16 @@ int reftable_new_stack(struct reftable_stack **dest, const char *dir, static int fd_read_lines(int fd, char ***namesp) { - off_t size = lseek(fd, 0, SEEK_END); char *buf = NULL; int err = 0; + off_t size; + + size = lseek(fd, 0, SEEK_END); if (size < 0) { err = REFTABLE_IO_ERROR; goto done; } + err = lseek(fd, 0, SEEK_SET); if (err < 0) { err = REFTABLE_IO_ERROR; @@ -134,9 +137,16 @@ static int fd_read_lines(int fd, char ***namesp) goto done; } - if (read_in_full(fd, buf, size) != size) { - err = REFTABLE_IO_ERROR; - goto done; + for (off_t total_read = 0; total_read < size; ) { + ssize_t bytes_read = read(fd, buf + total_read, size - total_read); + if (bytes_read < 0 && (errno == EAGAIN || errno == EINTR)) + continue; + if (bytes_read < 0 || !bytes_read) { + err = REFTABLE_IO_ERROR; + goto done; + } + + total_read += bytes_read; } buf[size] = 0; From e31db89558b7f1d622e5a64cf37779b721099814 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 18 Feb 2025 10:20:38 +0100 Subject: [PATCH 003/775] reftable/stack: stop using `write_in_full()` Similar to the preceding commit, drop our use of `write_in_full()` and implement a new wrapper `reftable_write_full()` that handles this logic for us. This is done to reduce our dependency on the Git library. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/stack.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/reftable/stack.c b/reftable/stack.c index 1cc47b94d9c89e..c33f0c3333accd 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -48,6 +48,25 @@ static int stack_fsync(const struct reftable_write_options *opts, int fd) return fsync(fd); } +static ssize_t reftable_write_data(int fd, const void *data, size_t size) +{ + size_t total_written = 0; + const char *p = data; + + while (total_written < size) { + ssize_t bytes_written = write(fd, p, size - total_written); + if (bytes_written < 0 && (errno == EAGAIN || errno == EINTR)) + continue; + if (bytes_written < 0) + return REFTABLE_IO_ERROR; + + total_written += bytes_written; + p += bytes_written; + } + + return total_written; +} + struct fd_writer { const struct reftable_write_options *opts; int fd; @@ -56,7 +75,7 @@ struct fd_writer { static ssize_t fd_writer_write(void *arg, const void *data, size_t sz) { struct fd_writer *writer = arg; - return write_in_full(writer->fd, data, sz); + return reftable_write_data(writer->fd, data, sz); } static int fd_writer_flush(void *arg) @@ -784,7 +803,8 @@ int reftable_addition_commit(struct reftable_addition *add) goto done; } - err = write_in_full(add->tables_list_lock.fd, table_list.buf, table_list.len); + err = reftable_write_data(add->tables_list_lock.fd, + table_list.buf, table_list.len); reftable_buf_release(&table_list); if (err < 0) { err = REFTABLE_IO_ERROR; @@ -1470,8 +1490,8 @@ static int stack_compact_range(struct reftable_stack *st, goto done; } - err = write_in_full(tables_list_lock.fd, - tables_list_buf.buf, tables_list_buf.len); + err = reftable_write_data(tables_list_lock.fd, + tables_list_buf.buf, tables_list_buf.len); if (err < 0) { err = REFTABLE_IO_ERROR; unlink(new_table_path.buf); From 70afa6fa318db517adb0cc7d4cb66260061bb684 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 18 Feb 2025 10:20:39 +0100 Subject: [PATCH 004/775] reftable/blocksource: stop using `xmmap()` We use `xmmap()` to map reftables into memory. This function has two problems: - It causes us to die in case the mmap fails. - It ties us to the Git codebase. Refactor the code to use mmap(3p) instead with manual error checking. Note that this function may not be the system-provided mmap(3p), but may point to our `git_mmap()` wrapper that emulates the syscall on systems that do not have mmap(3p) available. Fix `reftable_block_source_from_file()` to properly bubble up the error code in case the map(3p) call fails. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/blocksource.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/reftable/blocksource.c b/reftable/blocksource.c index bba4a45b98ab04..02972c46f4b6c2 100644 --- a/reftable/blocksource.c +++ b/reftable/blocksource.c @@ -98,7 +98,7 @@ static struct reftable_block_source_vtable file_vtable = { int reftable_block_source_from_file(struct reftable_block_source *bs, const char *name) { - struct file_block_source *p; + struct file_block_source *p = NULL; struct stat st; int fd, err; @@ -122,7 +122,12 @@ int reftable_block_source_from_file(struct reftable_block_source *bs, } p->size = st.st_size; - p->data = xmmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + p->data = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + if (p->data == MAP_FAILED) { + err = REFTABLE_IO_ERROR; + p->data = NULL; + goto out; + } assert(!bs->ops); bs->ops = &file_vtable; @@ -135,5 +140,5 @@ int reftable_block_source_from_file(struct reftable_block_source *bs, close(fd); if (err < 0) reftable_free(p); - return 0; + return err; } From a967966432f25324c79524c0cb18d6e152d0b6af Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 18 Feb 2025 10:20:40 +0100 Subject: [PATCH 005/775] reftable/record: stop using `COPY_ARRAY()` Drop our use of `COPY_ARRAY()`, replacing it with an open-coded variant thereof. This is done to reduce our dependency on the Git library. While at it, guard the whole array copy logic so that we only copy it in case there actually is anything to be copied. Otherwise, we may end up trying to allocate a zero-sized array, which will return a NULL pointer and thus cause us to return an `REFTABLE_OUT_OF_MEMORY_ERROR`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/record.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/reftable/record.c b/reftable/record.c index 8919df8a4d3e3d..2c0cc32cbd35f8 100644 --- a/reftable/record.c +++ b/reftable/record.c @@ -504,11 +504,17 @@ static int reftable_obj_record_copy_from(void *rec, const void *src_rec, if (src->hash_prefix_len) memcpy(obj->hash_prefix, src->hash_prefix, obj->hash_prefix_len); - REFTABLE_ALLOC_ARRAY(obj->offsets, src->offset_len); - if (!obj->offsets) - return REFTABLE_OUT_OF_MEMORY_ERROR; - obj->offset_len = src->offset_len; - COPY_ARRAY(obj->offsets, src->offsets, src->offset_len); + if (src->offset_len) { + if (sizeof(*src->offsets) > SIZE_MAX / src->offset_len) + return REFTABLE_OUT_OF_MEMORY_ERROR; + + REFTABLE_ALLOC_ARRAY(obj->offsets, src->offset_len); + if (!obj->offsets) + return REFTABLE_OUT_OF_MEMORY_ERROR; + + memcpy(obj->offsets, src->offsets, sizeof(*src->offsets) * src->offset_len); + obj->offset_len = src->offset_len; + } return 0; } From 9d9fac0f34ec47cc6eafeb3e10378ab8f3310346 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 18 Feb 2025 10:20:41 +0100 Subject: [PATCH 006/775] reftable/record: stop using `BUG()` in `reftable_record_init()` We're aborting the program via `BUG()` in case `reftable_record_init()` was invoked with an unknown record type. This is bad because we may now die in library code, and because it makes us depend on the Git codebase. Refactor the code such that `reftable_record_init()` can return an error code to the caller. Adapt any callers accordingly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/block.c | 4 +++- reftable/merged.c | 5 ++++- reftable/reader.c | 5 ++++- reftable/record.c | 8 ++++---- reftable/record.h | 4 ++-- t/unit-tests/t-reftable-pq.c | 6 +++--- t/unit-tests/t-reftable-record.c | 2 +- 7 files changed, 21 insertions(+), 13 deletions(-) diff --git a/reftable/block.c b/reftable/block.c index b14a8f1259aef1..999876826d1fd7 100644 --- a/reftable/block.c +++ b/reftable/block.c @@ -508,7 +508,9 @@ int block_iter_seek_key(struct block_iter *it, const struct block_reader *br, it->block_len = br->block_len; it->hash_size = br->hash_size; - reftable_record_init(&rec, block_reader_type(br)); + err = reftable_record_init(&rec, block_reader_type(br)); + if (err < 0) + goto done; /* * We're looking for the last entry less than the wanted key so that diff --git a/reftable/merged.c b/reftable/merged.c index e72b39e178d4de..4156eec07fc904 100644 --- a/reftable/merged.c +++ b/reftable/merged.c @@ -253,7 +253,10 @@ int merged_table_init_iter(struct reftable_merged_table *mt, } for (size_t i = 0; i < mt->readers_len; i++) { - reftable_record_init(&subiters[i].rec, typ); + ret = reftable_record_init(&subiters[i].rec, typ); + if (ret < 0) + goto out; + ret = reader_init_iter(mt->readers[i], &subiters[i].iter, typ); if (ret < 0) goto out; diff --git a/reftable/reader.c b/reftable/reader.c index 3f2e4b280055ff..de6e6dd93220a7 100644 --- a/reftable/reader.c +++ b/reftable/reader.c @@ -360,7 +360,10 @@ static int table_iter_seek_linear(struct table_iter *ti, struct reftable_record rec; int err; - reftable_record_init(&rec, reftable_record_type(want)); + err = reftable_record_init(&rec, reftable_record_type(want)); + if (err < 0) + goto done; + err = reftable_record_key(want, &want_key); if (err < 0) goto done; diff --git a/reftable/record.c b/reftable/record.c index 2c0cc32cbd35f8..1e18f8dffb1300 100644 --- a/reftable/record.c +++ b/reftable/record.c @@ -1306,7 +1306,7 @@ reftable_record_vtable(struct reftable_record *rec) abort(); } -void reftable_record_init(struct reftable_record *rec, uint8_t typ) +int reftable_record_init(struct reftable_record *rec, uint8_t typ) { memset(rec, 0, sizeof(*rec)); rec->type = typ; @@ -1315,11 +1315,11 @@ void reftable_record_init(struct reftable_record *rec, uint8_t typ) case BLOCK_TYPE_REF: case BLOCK_TYPE_LOG: case BLOCK_TYPE_OBJ: - return; + return 0; case BLOCK_TYPE_INDEX: reftable_buf_init(&rec->u.idx.last_key); - return; + return 0; default: - BUG("unhandled record type"); + return REFTABLE_API_ERROR; } } diff --git a/reftable/record.h b/reftable/record.h index c7755a4d750339..e1846c294ba0ba 100644 --- a/reftable/record.h +++ b/reftable/record.h @@ -130,8 +130,8 @@ struct reftable_record { } u; }; -/* Initialize the reftable record for the given type */ -void reftable_record_init(struct reftable_record *rec, uint8_t typ); +/* Initialize the reftable record for the given type. */ +int reftable_record_init(struct reftable_record *rec, uint8_t typ); /* see struct record_vtable */ int reftable_record_cmp(struct reftable_record *a, struct reftable_record *b); diff --git a/t/unit-tests/t-reftable-pq.c b/t/unit-tests/t-reftable-pq.c index f3f8a0cdf38579..d8a4c283a11701 100644 --- a/t/unit-tests/t-reftable-pq.c +++ b/t/unit-tests/t-reftable-pq.c @@ -32,7 +32,7 @@ static void t_pq_record(void) char *last = NULL; for (i = 0; i < N; i++) { - reftable_record_init(&recs[i], BLOCK_TYPE_REF); + check(!reftable_record_init(&recs[i], BLOCK_TYPE_REF)); recs[i].u.ref.refname = xstrfmt("%02"PRIuMAX, (uintmax_t)i); } @@ -72,7 +72,7 @@ static void t_pq_index(void) size_t N = ARRAY_SIZE(recs), i; for (i = 0; i < N; i++) { - reftable_record_init(&recs[i], BLOCK_TYPE_REF); + check(!reftable_record_init(&recs[i], BLOCK_TYPE_REF)); recs[i].u.ref.refname = (char *) "refs/heads/master"; } @@ -111,7 +111,7 @@ static void t_merged_iter_pqueue_top(void) size_t N = ARRAY_SIZE(recs), i; for (i = 0; i < N; i++) { - reftable_record_init(&recs[i], BLOCK_TYPE_REF); + check(!reftable_record_init(&recs[i], BLOCK_TYPE_REF)); recs[i].u.ref.refname = (char *) "refs/heads/master"; } diff --git a/t/unit-tests/t-reftable-record.c b/t/unit-tests/t-reftable-record.c index d49d2a2729cb17..6540bd20e37f14 100644 --- a/t/unit-tests/t-reftable-record.c +++ b/t/unit-tests/t-reftable-record.c @@ -17,7 +17,7 @@ static void t_copy(struct reftable_record *rec) uint8_t typ; typ = reftable_record_type(rec); - reftable_record_init(©, typ); + check(!reftable_record_init(©, typ)); reftable_record_copy_from(©, rec, REFTABLE_HASH_SIZE_SHA1); /* do it twice to catch memory leaks */ reftable_record_copy_from(©, rec, REFTABLE_HASH_SIZE_SHA1); From 6f6127decde6785b9ba5f22a07a7754d1fda1a59 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 18 Feb 2025 10:20:42 +0100 Subject: [PATCH 007/775] reftable/record: don't `BUG()` in `reftable_record_cmp()` The reftable library aborts with a bug in case `reftable_record_cmp()` is invoked with two records of differing types. This would cause the program to die without the caller being able to handle the error, which is not something we want in the context of library code. And it ties us to the Git codebase. Refactor the code such that `reftable_record_cmp()` returns an error code separate from the actual comparison result. This requires us to also adapt some callers up the callchain in a similar fashion. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/merged.c | 20 ++++++++++++---- reftable/pq.c | 36 ++++++++++++++++++++++------ reftable/pq.h | 2 +- reftable/record.c | 10 ++++---- reftable/record.h | 2 +- t/unit-tests/t-reftable-pq.c | 16 +++++++++---- t/unit-tests/t-reftable-record.c | 40 ++++++++++++++++++++++---------- 7 files changed, 92 insertions(+), 34 deletions(-) diff --git a/reftable/merged.c b/reftable/merged.c index 4156eec07fc904..563864068c1882 100644 --- a/reftable/merged.c +++ b/reftable/merged.c @@ -66,8 +66,11 @@ static int merged_iter_seek(struct merged_iter *mi, struct reftable_record *want int err; mi->advance_index = -1; - while (!merged_iter_pqueue_is_empty(mi->pq)) - merged_iter_pqueue_remove(&mi->pq); + while (!merged_iter_pqueue_is_empty(mi->pq)) { + err = merged_iter_pqueue_remove(&mi->pq, NULL); + if (err < 0) + return err; + } for (size_t i = 0; i < mi->subiters_len; i++) { err = iterator_seek(&mi->subiters[i].iter, want); @@ -120,7 +123,9 @@ static int merged_iter_next_entry(struct merged_iter *mi, if (empty) return 1; - entry = merged_iter_pqueue_remove(&mi->pq); + err = merged_iter_pqueue_remove(&mi->pq, &entry); + if (err < 0) + return err; /* One can also use reftable as datacenter-local storage, where the ref @@ -134,11 +139,16 @@ static int merged_iter_next_entry(struct merged_iter *mi, struct pq_entry top = merged_iter_pqueue_top(mi->pq); int cmp; - cmp = reftable_record_cmp(top.rec, entry.rec); + err = reftable_record_cmp(top.rec, entry.rec, &cmp); + if (err < 0) + return err; if (cmp > 0) break; - merged_iter_pqueue_remove(&mi->pq); + err = merged_iter_pqueue_remove(&mi->pq, NULL); + if (err < 0) + return err; + err = merged_iter_advance_subiter(mi, top.index); if (err < 0) return err; diff --git a/reftable/pq.c b/reftable/pq.c index 5591e875e1e845..ef8035cfd9b692 100644 --- a/reftable/pq.c +++ b/reftable/pq.c @@ -15,13 +15,18 @@ license that can be found in the LICENSE file or at int pq_less(struct pq_entry *a, struct pq_entry *b) { - int cmp = reftable_record_cmp(a->rec, b->rec); + int cmp, err; + + err = reftable_record_cmp(a->rec, b->rec, &cmp); + if (err < 0) + return err; + if (cmp == 0) return a->index > b->index; return cmp < 0; } -struct pq_entry merged_iter_pqueue_remove(struct merged_iter_pqueue *pq) +int merged_iter_pqueue_remove(struct merged_iter_pqueue *pq, struct pq_entry *out) { size_t i = 0; struct pq_entry e = pq->heap[0]; @@ -32,17 +37,34 @@ struct pq_entry merged_iter_pqueue_remove(struct merged_iter_pqueue *pq) size_t min = i; size_t j = 2 * i + 1; size_t k = 2 * i + 2; - if (j < pq->len && pq_less(&pq->heap[j], &pq->heap[i])) - min = j; - if (k < pq->len && pq_less(&pq->heap[k], &pq->heap[min])) - min = k; + int cmp; + + if (j < pq->len) { + cmp = pq_less(&pq->heap[j], &pq->heap[i]); + if (cmp < 0) + return -1; + else if (cmp) + min = j; + } + + if (k < pq->len) { + cmp = pq_less(&pq->heap[k], &pq->heap[min]); + if (cmp < 0) + return -1; + else if (cmp) + min = k; + } + if (min == i) break; SWAP(pq->heap[i], pq->heap[min]); i = min; } - return e; + if (out) + *out = e; + + return 0; } int merged_iter_pqueue_add(struct merged_iter_pqueue *pq, const struct pq_entry *e) diff --git a/reftable/pq.h b/reftable/pq.h index 83c062eecac9f2..ff39016445b3f7 100644 --- a/reftable/pq.h +++ b/reftable/pq.h @@ -22,7 +22,7 @@ struct merged_iter_pqueue { size_t cap; }; -struct pq_entry merged_iter_pqueue_remove(struct merged_iter_pqueue *pq); +int merged_iter_pqueue_remove(struct merged_iter_pqueue *pq, struct pq_entry *out); int merged_iter_pqueue_add(struct merged_iter_pqueue *pq, const struct pq_entry *e); void merged_iter_pqueue_release(struct merged_iter_pqueue *pq); int pq_less(struct pq_entry *a, struct pq_entry *b); diff --git a/reftable/record.c b/reftable/record.c index 1e18f8dffb1300..b39d99fcc75a66 100644 --- a/reftable/record.c +++ b/reftable/record.c @@ -1195,12 +1195,14 @@ int reftable_record_is_deletion(struct reftable_record *rec) reftable_record_data(rec)); } -int reftable_record_cmp(struct reftable_record *a, struct reftable_record *b) +int reftable_record_cmp(struct reftable_record *a, struct reftable_record *b, + int *cmp) { if (a->type != b->type) - BUG("cannot compare reftable records of different type"); - return reftable_record_vtable(a)->cmp( - reftable_record_data(a), reftable_record_data(b)); + return -1; + *cmp = reftable_record_vtable(a)->cmp(reftable_record_data(a), + reftable_record_data(b)); + return 0; } int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, uint32_t hash_size) diff --git a/reftable/record.h b/reftable/record.h index e1846c294ba0ba..867810a9328218 100644 --- a/reftable/record.h +++ b/reftable/record.h @@ -134,7 +134,7 @@ struct reftable_record { int reftable_record_init(struct reftable_record *rec, uint8_t typ); /* see struct record_vtable */ -int reftable_record_cmp(struct reftable_record *a, struct reftable_record *b); +int reftable_record_cmp(struct reftable_record *a, struct reftable_record *b, int *cmp); int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, uint32_t hash_size); int reftable_record_key(struct reftable_record *rec, struct reftable_buf *dest); int reftable_record_copy_from(struct reftable_record *rec, diff --git a/t/unit-tests/t-reftable-pq.c b/t/unit-tests/t-reftable-pq.c index d8a4c283a11701..c128fe8616a604 100644 --- a/t/unit-tests/t-reftable-pq.c +++ b/t/unit-tests/t-reftable-pq.c @@ -21,7 +21,9 @@ static void merged_iter_pqueue_check(const struct merged_iter_pqueue *pq) static int pq_entry_equal(struct pq_entry *a, struct pq_entry *b) { - return !reftable_record_cmp(a->rec, b->rec) && (a->index == b->index); + int cmp; + check(!reftable_record_cmp(a->rec, b->rec, &cmp)); + return !cmp && (a->index == b->index); } static void t_pq_record(void) @@ -49,7 +51,9 @@ static void t_pq_record(void) while (!merged_iter_pqueue_is_empty(pq)) { struct pq_entry top = merged_iter_pqueue_top(pq); - struct pq_entry e = merged_iter_pqueue_remove(&pq); + struct pq_entry e; + + check(!merged_iter_pqueue_remove(&pq, &e)); merged_iter_pqueue_check(&pq); check(pq_entry_equal(&top, &e)); @@ -90,7 +94,9 @@ static void t_pq_index(void) for (i = N - 1; i > 0; i--) { struct pq_entry top = merged_iter_pqueue_top(pq); - struct pq_entry e = merged_iter_pqueue_remove(&pq); + struct pq_entry e; + + check(!merged_iter_pqueue_remove(&pq, &e)); merged_iter_pqueue_check(&pq); check(pq_entry_equal(&top, &e)); @@ -129,7 +135,9 @@ static void t_merged_iter_pqueue_top(void) for (i = N - 1; i > 0; i--) { struct pq_entry top = merged_iter_pqueue_top(pq); - struct pq_entry e = merged_iter_pqueue_remove(&pq); + struct pq_entry e; + + check(!merged_iter_pqueue_remove(&pq, &e)); merged_iter_pqueue_check(&pq); check(pq_entry_equal(&top, &e)); diff --git a/t/unit-tests/t-reftable-record.c b/t/unit-tests/t-reftable-record.c index 6540bd20e37f14..595496637364a1 100644 --- a/t/unit-tests/t-reftable-record.c +++ b/t/unit-tests/t-reftable-record.c @@ -100,16 +100,20 @@ static void t_reftable_ref_record_comparison(void) .u.ref.value.symref = (char *) "refs/heads/master", }, }; + int cmp; check(!reftable_record_equal(&in[0], &in[1], REFTABLE_HASH_SIZE_SHA1)); - check(!reftable_record_cmp(&in[0], &in[1])); + check(!reftable_record_cmp(&in[0], &in[1], &cmp)); + check(!cmp); check(!reftable_record_equal(&in[1], &in[2], REFTABLE_HASH_SIZE_SHA1)); - check_int(reftable_record_cmp(&in[1], &in[2]), >, 0); + check(!reftable_record_cmp(&in[1], &in[2], &cmp)); + check_int(cmp, >, 0); in[1].u.ref.value_type = in[0].u.ref.value_type; check(reftable_record_equal(&in[0], &in[1], REFTABLE_HASH_SIZE_SHA1)); - check(!reftable_record_cmp(&in[0], &in[1])); + check(!reftable_record_cmp(&in[0], &in[1], &cmp)); + check(!cmp); } static void t_reftable_ref_record_compare_name(void) @@ -209,17 +213,20 @@ static void t_reftable_log_record_comparison(void) .u.log.update_index = 22, }, }; + int cmp; check(!reftable_record_equal(&in[0], &in[1], REFTABLE_HASH_SIZE_SHA1)); check(!reftable_record_equal(&in[1], &in[2], REFTABLE_HASH_SIZE_SHA1)); - check_int(reftable_record_cmp(&in[1], &in[2]), >, 0); + check(!reftable_record_cmp(&in[1], &in[2], &cmp)); + check_int(cmp, >, 0); /* comparison should be reversed for equal keys, because * comparison is now performed on the basis of update indices */ - check_int(reftable_record_cmp(&in[0], &in[1]), <, 0); + check(!reftable_record_cmp(&in[0], &in[1], &cmp)); + check_int(cmp, <, 0); in[1].u.log.update_index = in[0].u.log.update_index; check(reftable_record_equal(&in[0], &in[1], REFTABLE_HASH_SIZE_SHA1)); - check(!reftable_record_cmp(&in[0], &in[1])); + check(!reftable_record_cmp(&in[0], &in[1], &cmp)); } static void t_reftable_log_record_compare_key(void) @@ -396,16 +403,20 @@ static void t_reftable_obj_record_comparison(void) .u.obj.hash_prefix_len = 5, }, }; + int cmp; check(!reftable_record_equal(&in[0], &in[1], REFTABLE_HASH_SIZE_SHA1)); - check(!reftable_record_cmp(&in[0], &in[1])); + check(!reftable_record_cmp(&in[0], &in[1], &cmp)); + check(!cmp); check(!reftable_record_equal(&in[1], &in[2], REFTABLE_HASH_SIZE_SHA1)); - check_int(reftable_record_cmp(&in[1], &in[2]), >, 0); + check(!reftable_record_cmp(&in[1], &in[2], &cmp)); + check_int(cmp, >, 0); in[1].u.obj.offset_len = in[0].u.obj.offset_len; check(reftable_record_equal(&in[0], &in[1], REFTABLE_HASH_SIZE_SHA1)); - check(!reftable_record_cmp(&in[0], &in[1])); + check(!reftable_record_cmp(&in[0], &in[1], &cmp)); + check(!cmp); } static void t_reftable_obj_record_roundtrip(void) @@ -486,19 +497,24 @@ static void t_reftable_index_record_comparison(void) .u.idx.last_key = REFTABLE_BUF_INIT, }, }; + int cmp; + check(!reftable_buf_addstr(&in[0].u.idx.last_key, "refs/heads/master")); check(!reftable_buf_addstr(&in[1].u.idx.last_key, "refs/heads/master")); check(!reftable_buf_addstr(&in[2].u.idx.last_key, "refs/heads/branch")); check(!reftable_record_equal(&in[0], &in[1], REFTABLE_HASH_SIZE_SHA1)); - check(!reftable_record_cmp(&in[0], &in[1])); + check(!reftable_record_cmp(&in[0], &in[1], &cmp)); + check(!cmp); check(!reftable_record_equal(&in[1], &in[2], REFTABLE_HASH_SIZE_SHA1)); - check_int(reftable_record_cmp(&in[1], &in[2]), >, 0); + check(!reftable_record_cmp(&in[1], &in[2], &cmp)); + check_int(cmp, >, 0); in[1].u.idx.offset = in[0].u.idx.offset; check(reftable_record_equal(&in[0], &in[1], REFTABLE_HASH_SIZE_SHA1)); - check(!reftable_record_cmp(&in[0], &in[1])); + check(!reftable_record_cmp(&in[0], &in[1], &cmp)); + check(!cmp); for (size_t i = 0; i < ARRAY_SIZE(in); i++) reftable_record_release(&in[i]); From 445f9f4f35c663fb668425f8c8fe0a1d58e1d8c7 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 18 Feb 2025 10:20:43 +0100 Subject: [PATCH 008/775] reftable: stop using `BUG()` in trivial cases Stop using `BUG()` in the remaining trivial cases that we still have in the reftable library. Instead of aborting the program, we'll now bubble up a `REFTABLE_API_ERROR` to indicate misuse of the calling conventions. Note that in both `reftable_reader_{inc,dec}ref()` we simply stop calling `BUG()` altogether. The only situation where the counter should be zero is when the structure has already been free'd anyway, so we would run into undefined behaviour regardless of whether we try to abort the program or not. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/iter.c | 3 +-- reftable/reader.c | 4 ---- reftable/writer.c | 5 ++--- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/reftable/iter.c b/reftable/iter.c index 86e801ca9fbc6b..b2ffb09c16bc4d 100644 --- a/reftable/iter.c +++ b/reftable/iter.c @@ -146,8 +146,7 @@ static int indexed_table_ref_iter_next_block(struct indexed_table_ref_iter *it) static int indexed_table_ref_iter_seek(void *p UNUSED, struct reftable_record *want UNUSED) { - BUG("seeking indexed table is not supported"); - return -1; + return REFTABLE_API_ERROR; } static int indexed_table_ref_iter_next(void *p, struct reftable_record *rec) diff --git a/reftable/reader.c b/reftable/reader.c index de6e6dd93220a7..36a5633eded9f3 100644 --- a/reftable/reader.c +++ b/reftable/reader.c @@ -677,8 +677,6 @@ int reftable_reader_new(struct reftable_reader **out, void reftable_reader_incref(struct reftable_reader *r) { - if (!r->refcount) - BUG("cannot increment ref counter of dead reader"); r->refcount++; } @@ -686,8 +684,6 @@ void reftable_reader_decref(struct reftable_reader *r) { if (!r) return; - if (!r->refcount) - BUG("cannot decrement ref counter of dead reader"); if (--r->refcount) return; block_source_close(&r->source); diff --git a/reftable/writer.c b/reftable/writer.c index f3ab1035d61d96..239573ade24506 100644 --- a/reftable/writer.c +++ b/reftable/writer.c @@ -158,7 +158,7 @@ int reftable_writer_new(struct reftable_writer **out, opts = *_opts; options_set_defaults(&opts); if (opts.block_size >= (1 << 24)) - BUG("configured block size exceeds 16MB"); + return REFTABLE_API_ERROR; reftable_buf_init(&wp->block_writer_data.last_key); reftable_buf_init(&wp->last_key); @@ -302,8 +302,7 @@ static int writer_add_record(struct reftable_writer *w, } if (block_writer_type(w->block_writer) != reftable_record_type(rec)) - BUG("record of type %d added to writer of type %d", - reftable_record_type(rec), block_writer_type(w->block_writer)); + return REFTABLE_API_ERROR; /* * Try to add the record to the writer. If this succeeds then we're From 6e3ea71639a3541523f2be8f950325c8a0aa6768 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 18 Feb 2025 10:20:44 +0100 Subject: [PATCH 009/775] reftable/basics: stop using `st_mult()` in array allocators We're using `st_mult()` as part of our macro helpers that allocate arrays. This is bad due two two reasons: - `st_mult()` causes us to die in case the multiplication overflows. - `st_mult()` ties us to the Git codebase. Refactor the code to instead detect overflows manually and return an error in such cases. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/basics.h | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/reftable/basics.h b/reftable/basics.h index a2a010a0e199ba..646f8d67f2d4de 100644 --- a/reftable/basics.h +++ b/reftable/basics.h @@ -117,18 +117,46 @@ void reftable_free(void *p); void *reftable_calloc(size_t nelem, size_t elsize); char *reftable_strdup(const char *str); -#define REFTABLE_ALLOC_ARRAY(x, alloc) (x) = reftable_malloc(st_mult(sizeof(*(x)), (alloc))) +static inline int reftable_alloc_size(size_t nelem, size_t elsize, size_t *out) +{ + if (nelem && elsize > SIZE_MAX / nelem) + return -1; + *out = nelem * elsize; + return 0; +} + +#define REFTABLE_ALLOC_ARRAY(x, alloc) do { \ + size_t alloc_size; \ + if (reftable_alloc_size(sizeof(*(x)), (alloc), &alloc_size) < 0) { \ + errno = ENOMEM; \ + (x) = NULL; \ + } else { \ + (x) = reftable_malloc(alloc_size); \ + } \ + } while (0) #define REFTABLE_CALLOC_ARRAY(x, alloc) (x) = reftable_calloc((alloc), sizeof(*(x))) -#define REFTABLE_REALLOC_ARRAY(x, alloc) (x) = reftable_realloc((x), st_mult(sizeof(*(x)), (alloc))) +#define REFTABLE_REALLOC_ARRAY(x, alloc) do { \ + size_t alloc_size; \ + if (reftable_alloc_size(sizeof(*(x)), (alloc), &alloc_size) < 0) { \ + errno = ENOMEM; \ + (x) = NULL; \ + } else { \ + (x) = reftable_realloc((x), alloc_size); \ + } \ + } while (0) static inline void *reftable_alloc_grow(void *p, size_t nelem, size_t elsize, size_t *allocp) { void *new_p; - size_t alloc = *allocp * 2 + 1; + size_t alloc = *allocp * 2 + 1, alloc_bytes; if (alloc < nelem) alloc = nelem; - new_p = reftable_realloc(p, st_mult(elsize, alloc)); + if (reftable_alloc_size(elsize, alloc, &alloc_bytes) < 0) { + errno = ENOMEM; + return p; + } + new_p = reftable_realloc(p, alloc_bytes); if (!new_p) return p; *allocp = alloc; From e676694298c4a8d9f6fdf3844cbfb03bbee552cc Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 18 Feb 2025 10:20:45 +0100 Subject: [PATCH 010/775] reftable/basics: provide wrappers for big endian conversion We're using a mixture of big endian conversion functions provided by both the reftable library, but also by the Git codebase. Refactor the code so that we exclusively use reftable-provided wrappers in order to untangle us from the Git codebase. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/basics.c | 19 -------- reftable/basics.h | 76 ++++++++++++++++++++++++++++++-- reftable/block.c | 12 ++--- reftable/reader.c | 22 ++++----- reftable/record.c | 8 ++-- reftable/writer.c | 20 ++++----- t/unit-tests/t-reftable-basics.c | 28 +++++++++--- 7 files changed, 127 insertions(+), 58 deletions(-) diff --git a/reftable/basics.c b/reftable/basics.c index 3b5ea27bbdc56e..8c4a4433e41100 100644 --- a/reftable/basics.c +++ b/reftable/basics.c @@ -147,25 +147,6 @@ char *reftable_buf_detach(struct reftable_buf *buf) return result; } -void put_be24(uint8_t *out, uint32_t i) -{ - out[0] = (uint8_t)((i >> 16) & 0xff); - out[1] = (uint8_t)((i >> 8) & 0xff); - out[2] = (uint8_t)(i & 0xff); -} - -uint32_t get_be24(uint8_t *in) -{ - return (uint32_t)(in[0]) << 16 | (uint32_t)(in[1]) << 8 | - (uint32_t)(in[2]); -} - -void put_be16(uint8_t *out, uint16_t i) -{ - out[0] = (uint8_t)((i >> 8) & 0xff); - out[1] = (uint8_t)(i & 0xff); -} - size_t binsearch(size_t sz, int (*f)(size_t k, void *args), void *args) { size_t lo = 0; diff --git a/reftable/basics.h b/reftable/basics.h index 646f8d67f2d4de..c1ddbaec3ff581 100644 --- a/reftable/basics.h +++ b/reftable/basics.h @@ -76,9 +76,79 @@ char *reftable_buf_detach(struct reftable_buf *buf); /* Bigendian en/decoding of integers */ -void put_be24(uint8_t *out, uint32_t i); -uint32_t get_be24(uint8_t *in); -void put_be16(uint8_t *out, uint16_t i); +static inline void reftable_put_be16(void *out, uint16_t i) +{ + unsigned char *p = out; + p[0] = (uint8_t)((i >> 8) & 0xff); + p[1] = (uint8_t)((i >> 0) & 0xff); +} + +static inline void reftable_put_be24(void *out, uint32_t i) +{ + unsigned char *p = out; + p[0] = (uint8_t)((i >> 16) & 0xff); + p[1] = (uint8_t)((i >> 8) & 0xff); + p[2] = (uint8_t)((i >> 0) & 0xff); +} + +static inline void reftable_put_be32(void *out, uint32_t i) +{ + unsigned char *p = out; + p[0] = (uint8_t)((i >> 24) & 0xff); + p[1] = (uint8_t)((i >> 16) & 0xff); + p[2] = (uint8_t)((i >> 8) & 0xff); + p[3] = (uint8_t)((i >> 0) & 0xff); +} + +static inline void reftable_put_be64(void *out, uint64_t i) +{ + unsigned char *p = out; + p[0] = (uint8_t)((i >> 56) & 0xff); + p[1] = (uint8_t)((i >> 48) & 0xff); + p[2] = (uint8_t)((i >> 40) & 0xff); + p[3] = (uint8_t)((i >> 32) & 0xff); + p[4] = (uint8_t)((i >> 24) & 0xff); + p[5] = (uint8_t)((i >> 16) & 0xff); + p[6] = (uint8_t)((i >> 8) & 0xff); + p[7] = (uint8_t)((i >> 0) & 0xff); +} + +static inline uint16_t reftable_get_be16(const void *in) +{ + const unsigned char *p = in; + return (uint16_t)(p[0]) << 8 | + (uint16_t)(p[1]) << 0; +} + +static inline uint32_t reftable_get_be24(const void *in) +{ + const unsigned char *p = in; + return (uint32_t)(p[0]) << 16 | + (uint32_t)(p[1]) << 8 | + (uint32_t)(p[2]) << 0; +} + +static inline uint32_t reftable_get_be32(const void *in) +{ + const unsigned char *p = in; + return (uint32_t)(p[0]) << 24 | + (uint32_t)(p[1]) << 16 | + (uint32_t)(p[2]) << 8| + (uint32_t)(p[3]) << 0; +} + +static inline uint64_t reftable_get_be64(const void *in) +{ + const unsigned char *p = in; + return (uint64_t)(p[0]) << 56 | + (uint64_t)(p[1]) << 48 | + (uint64_t)(p[2]) << 40 | + (uint64_t)(p[3]) << 32 | + (uint64_t)(p[4]) << 24 | + (uint64_t)(p[5]) << 16 | + (uint64_t)(p[6]) << 8 | + (uint64_t)(p[7]) << 0; +} /* * find smallest index i in [0, sz) at which `f(i) > 0`, assuming that f is diff --git a/reftable/block.c b/reftable/block.c index 999876826d1fd7..53b5e044690f52 100644 --- a/reftable/block.c +++ b/reftable/block.c @@ -147,13 +147,13 @@ int block_writer_add(struct block_writer *w, struct reftable_record *rec) int block_writer_finish(struct block_writer *w) { for (uint32_t i = 0; i < w->restart_len; i++) { - put_be24(w->block + w->next, w->restarts[i]); + reftable_put_be24(w->block + w->next, w->restarts[i]); w->next += 3; } - put_be16(w->block + w->next, w->restart_len); + reftable_put_be16(w->block + w->next, w->restart_len); w->next += 2; - put_be24(w->block + 1 + w->header_off, w->next); + reftable_put_be24(w->block + 1 + w->header_off, w->next); /* * Log records are stored zlib-compressed. Note that the compression @@ -215,7 +215,7 @@ int block_reader_init(struct block_reader *br, struct reftable_block *block, { uint32_t full_block_size = table_block_size; uint8_t typ = block->data[header_off]; - uint32_t sz = get_be24(block->data + header_off + 1); + uint32_t sz = reftable_get_be24(block->data + header_off + 1); int err = 0; uint16_t restart_count = 0; uint32_t restart_start = 0; @@ -299,7 +299,7 @@ int block_reader_init(struct block_reader *br, struct reftable_block *block, full_block_size = sz; } - restart_count = get_be16(block->data + sz - 2); + restart_count = reftable_get_be16(block->data + sz - 2); restart_start = sz - 2 - 3 * restart_count; restart_bytes = block->data + restart_start; @@ -354,7 +354,7 @@ int block_reader_first_key(const struct block_reader *br, struct reftable_buf *k static uint32_t block_reader_restart_offset(const struct block_reader *br, size_t idx) { - return get_be24(br->restart_bytes + 3 * idx); + return reftable_get_be24(br->restart_bytes + 3 * idx); } void block_iter_seek_start(struct block_iter *it, const struct block_reader *br) diff --git a/reftable/reader.c b/reftable/reader.c index 36a5633eded9f3..bf07a0a586fd6f 100644 --- a/reftable/reader.c +++ b/reftable/reader.c @@ -101,18 +101,18 @@ static int parse_footer(struct reftable_reader *r, uint8_t *footer, } f++; - r->block_size = get_be24(f); + r->block_size = reftable_get_be24(f); f += 3; - r->min_update_index = get_be64(f); + r->min_update_index = reftable_get_be64(f); f += 8; - r->max_update_index = get_be64(f); + r->max_update_index = reftable_get_be64(f); f += 8; if (r->version == 1) { r->hash_id = REFTABLE_HASH_SHA1; } else { - switch (get_be32(f)) { + switch (reftable_get_be32(f)) { case REFTABLE_FORMAT_ID_SHA1: r->hash_id = REFTABLE_HASH_SHA1; break; @@ -127,24 +127,24 @@ static int parse_footer(struct reftable_reader *r, uint8_t *footer, f += 4; } - r->ref_offsets.index_offset = get_be64(f); + r->ref_offsets.index_offset = reftable_get_be64(f); f += 8; - r->obj_offsets.offset = get_be64(f); + r->obj_offsets.offset = reftable_get_be64(f); f += 8; r->object_id_len = r->obj_offsets.offset & ((1 << 5) - 1); r->obj_offsets.offset >>= 5; - r->obj_offsets.index_offset = get_be64(f); + r->obj_offsets.index_offset = reftable_get_be64(f); f += 8; - r->log_offsets.offset = get_be64(f); + r->log_offsets.offset = reftable_get_be64(f); f += 8; - r->log_offsets.index_offset = get_be64(f); + r->log_offsets.index_offset = reftable_get_be64(f); f += 8; computed_crc = crc32(0, footer, f - footer); - file_crc = get_be32(f); + file_crc = reftable_get_be32(f); f += 4; if (computed_crc != file_crc) { err = REFTABLE_FORMAT_ERROR; @@ -214,7 +214,7 @@ static int32_t extract_block_size(uint8_t *data, uint8_t *typ, uint64_t off, *typ = data[0]; if (reftable_is_block_type(*typ)) { - result = get_be24(data + 1); + result = reftable_get_be24(data + 1); } return result; } diff --git a/reftable/record.c b/reftable/record.c index b39d99fcc75a66..3552bafa9945eb 100644 --- a/reftable/record.c +++ b/reftable/record.c @@ -689,7 +689,7 @@ static int reftable_log_record_key(const void *r, struct reftable_buf *dest) return err; ts = (~ts) - rec->update_index; - put_be64(&i64[0], ts); + reftable_put_be64(&i64[0], ts); err = reftable_buf_add(dest, i64, sizeof(i64)); if (err < 0) @@ -814,7 +814,7 @@ static int reftable_log_record_encode(const void *rec, struct string_view s, if (s.len < 2) return -1; - put_be16(s.buf, r->value.update.tz_offset); + reftable_put_be16(s.buf, r->value.update.tz_offset); string_view_consume(&s, 2); n = encode_string( @@ -846,7 +846,7 @@ static int reftable_log_record_decode(void *rec, struct reftable_buf key, } memcpy(r->refname, key.buf, key.len - 8); - ts = get_be64(key.buf + key.len - 8); + ts = reftable_get_be64((unsigned char *)key.buf + key.len - 8); r->update_index = (~max) - ts; @@ -937,7 +937,7 @@ static int reftable_log_record_decode(void *rec, struct reftable_buf key, goto done; } - r->value.update.tz_offset = get_be16(in.buf); + r->value.update.tz_offset = reftable_get_be16(in.buf); string_view_consume(&in, 2); n = decode_string(scratch, in); diff --git a/reftable/writer.c b/reftable/writer.c index 239573ade24506..913b971b591eb4 100644 --- a/reftable/writer.c +++ b/reftable/writer.c @@ -99,9 +99,9 @@ static int writer_write_header(struct reftable_writer *w, uint8_t *dest) dest[4] = writer_version(w); - put_be24(dest + 5, w->opts.block_size); - put_be64(dest + 8, w->min_update_index); - put_be64(dest + 16, w->max_update_index); + reftable_put_be24(dest + 5, w->opts.block_size); + reftable_put_be64(dest + 8, w->min_update_index); + reftable_put_be64(dest + 16, w->max_update_index); if (writer_version(w) == 2) { uint32_t hash_id; @@ -116,7 +116,7 @@ static int writer_write_header(struct reftable_writer *w, uint8_t *dest) return -1; } - put_be32(dest + 24, hash_id); + reftable_put_be32(dest + 24, hash_id); } return header_size(writer_version(w)); @@ -730,19 +730,19 @@ int reftable_writer_close(struct reftable_writer *w) } p += writer_write_header(w, footer); - put_be64(p, w->stats.ref_stats.index_offset); + reftable_put_be64(p, w->stats.ref_stats.index_offset); p += 8; - put_be64(p, (w->stats.obj_stats.offset) << 5 | w->stats.object_id_len); + reftable_put_be64(p, (w->stats.obj_stats.offset) << 5 | w->stats.object_id_len); p += 8; - put_be64(p, w->stats.obj_stats.index_offset); + reftable_put_be64(p, w->stats.obj_stats.index_offset); p += 8; - put_be64(p, w->stats.log_stats.offset); + reftable_put_be64(p, w->stats.log_stats.offset); p += 8; - put_be64(p, w->stats.log_stats.index_offset); + reftable_put_be64(p, w->stats.log_stats.index_offset); p += 8; - put_be32(p, crc32(0, footer, p - footer)); + reftable_put_be32(p, crc32(0, footer, p - footer)); p += 4; err = w->flush(w->write_arg); diff --git a/t/unit-tests/t-reftable-basics.c b/t/unit-tests/t-reftable-basics.c index 9ba7eb05ada89b..c9e751e49e9aae 100644 --- a/t/unit-tests/t-reftable-basics.c +++ b/t/unit-tests/t-reftable-basics.c @@ -128,12 +128,30 @@ int cmd_main(int argc UNUSED, const char *argv[] UNUSED) reftable_buf_release(&b); } - if_test ("put_be24 and get_be24 work") { + if_test ("reftable_put_be64 and reftable_get_be64 work") { + uint64_t in = 0x1122334455667788; + uint8_t dest[8]; + uint64_t out; + reftable_put_be64(dest, in); + out = reftable_get_be64(dest); + check_int(in, ==, out); + } + + if_test ("reftable_put_be32 and reftable_get_be32 work") { + uint32_t in = 0x11223344; + uint8_t dest[4]; + uint32_t out; + reftable_put_be32(dest, in); + out = reftable_get_be32(dest); + check_int(in, ==, out); + } + + if_test ("reftable_put_be24 and reftable_get_be24 work") { uint32_t in = 0x112233; uint8_t dest[3]; uint32_t out; - put_be24(dest, in); - out = get_be24(dest); + reftable_put_be24(dest, in); + out = reftable_get_be24(dest); check_int(in, ==, out); } @@ -141,8 +159,8 @@ int cmd_main(int argc UNUSED, const char *argv[] UNUSED) uint32_t in = 0xfef1; uint8_t dest[3]; uint32_t out; - put_be16(dest, in); - out = get_be16(dest); + reftable_put_be16(dest, in); + out = reftable_get_be16(dest); check_int(in, ==, out); } From 01a587da8cf89f9d6c8c5b19ea3e109efb7c9b7c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 18 Feb 2025 10:20:46 +0100 Subject: [PATCH 011/775] reftable/reader: stop using `ARRAY_SIZE()` macro We have a single user of the `ARRAY_SIZE()` macro in the reftable reader. Drop its use to reduce our dependence on the Git codebase. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/reader.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reftable/reader.c b/reftable/reader.c index bf07a0a586fd6f..c3a367466529db 100644 --- a/reftable/reader.c +++ b/reftable/reader.c @@ -849,7 +849,7 @@ int reftable_reader_print_blocks(const char *tablename) printf("header:\n"); printf(" block_size: %d\n", r->block_size); - for (i = 0; i < ARRAY_SIZE(sections); i++) { + for (i = 0; i < sizeof(sections) / sizeof(*sections); i++) { err = table_iter_seek_start(&ti, sections[i].type, 0); if (err < 0) goto done; From 712f6cfe5411fdbbf46688c22a40fac7e0a8839b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 18 Feb 2025 10:20:47 +0100 Subject: [PATCH 012/775] reftable/system: introduce `reftable_rand()` Introduce a new system-level `reftable_rand()` function that generates a single unsigned integer for us. The implementation of this function is to be provided by the calling codebase, which allows us to more easily hook into pre-seeded random number generators. Adapt the two callsites where we generated random data. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/stack.c | 4 ++-- reftable/system.c | 5 +++++ reftable/system.h | 6 ++++++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/reftable/stack.c b/reftable/stack.c index c33f0c3333accd..08893fd454fe54 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -523,7 +523,7 @@ static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st, close(fd); fd = -1; - delay = delay + (delay * git_rand(CSPRNG_BYTES_INSECURE)) / UINT32_MAX + 1; + delay = delay + (delay * reftable_rand()) / UINT32_MAX + 1; sleep_millisec(delay); } @@ -688,7 +688,7 @@ int reftable_stack_add(struct reftable_stack *st, static int format_name(struct reftable_buf *dest, uint64_t min, uint64_t max) { char buf[100]; - uint32_t rnd = git_rand(CSPRNG_BYTES_INSECURE); + uint32_t rnd = reftable_rand(); snprintf(buf, sizeof(buf), "0x%012" PRIx64 "-0x%012" PRIx64 "-%08x", min, max, rnd); reftable_buf_reset(dest); diff --git a/reftable/system.c b/reftable/system.c index adf8e4d30b823c..e25ccc0da3ccb8 100644 --- a/reftable/system.c +++ b/reftable/system.c @@ -4,6 +4,11 @@ #include "../lockfile.h" #include "../tempfile.h" +uint32_t reftable_rand(void) +{ + return git_rand(CSPRNG_BYTES_INSECURE); +} + int tmpfile_from_pattern(struct reftable_tmpfile *out, const char *pattern) { struct tempfile *tempfile; diff --git a/reftable/system.h b/reftable/system.h index d02eacea8f02dd..bb6a7e6285b840 100644 --- a/reftable/system.h +++ b/reftable/system.h @@ -14,6 +14,12 @@ license that can be found in the LICENSE file or at #include "git-compat-util.h" #include "compat/zlib-compat.h" +/* + * Return a random 32 bit integer. This function is expected to return + * pre-seeded data. + */ +uint32_t reftable_rand(void); + /* * An implementation-specific temporary file. By making this specific to the * implementation it becomes possible to tie temporary files into any kind of From 10f2935c7f2070368dadde582f94c9c76e2fe991 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 18 Feb 2025 10:20:48 +0100 Subject: [PATCH 013/775] reftable/stack: stop using `sleep_millisec()` Refactor our use of `sleep_millisec()` by open-coding it with poll(3p), which is the current implementation of this function. Ideally, we'd use a more direct way to sleep, but there is no equivalent to sleep(3p) that would accept milliseconds as input. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/stack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reftable/stack.c b/reftable/stack.c index 08893fd454fe54..6dac015b473a69 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -524,7 +524,7 @@ static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st, fd = -1; delay = delay + (delay * reftable_rand()) / UINT32_MAX + 1; - sleep_millisec(delay); + poll(NULL, 0, delay); } out: From f8ed12dec459df9ea0d43ccacc93cb2c32702c0e Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 18 Feb 2025 10:20:49 +0100 Subject: [PATCH 014/775] reftable/basics: stop using `SWAP()` macro Stop using `SWAP()` macro in favor of an open-coded variant of it. Note that this also requires us to open-code the build assert that `SWAP()` itself uses to verify that the size of both variables matches. This is done to reduce our dependency on the Git codebase. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/basics.h | 9 +++++++++ reftable/merged.c | 2 +- reftable/pq.c | 4 ++-- reftable/record.c | 16 ++++++++-------- 4 files changed, 20 insertions(+), 11 deletions(-) diff --git a/reftable/basics.h b/reftable/basics.h index c1ddbaec3ff581..59000798f0be1f 100644 --- a/reftable/basics.h +++ b/reftable/basics.h @@ -266,6 +266,15 @@ static inline void *reftable_alloc_grow(void *p, size_t nelem, size_t elsize, # define strdup(str) REFTABLE_BANNED(strdup) #endif +#define REFTABLE_SWAP(a, b) do { \ + void *_swap_a_ptr = &(a); \ + void *_swap_b_ptr = &(b); \ + unsigned char _swap_buffer[sizeof(a) - 2 * sizeof(a) * (sizeof(a) != sizeof(b))]; \ + memcpy(_swap_buffer, _swap_a_ptr, sizeof(a)); \ + memcpy(_swap_a_ptr, _swap_b_ptr, sizeof(a)); \ + memcpy(_swap_b_ptr, _swap_buffer, sizeof(a)); \ +} while (0) + /* Find the longest shared prefix size of `a` and `b` */ size_t common_prefix_size(struct reftable_buf *a, struct reftable_buf *b); diff --git a/reftable/merged.c b/reftable/merged.c index 563864068c1882..4ff1553772a037 100644 --- a/reftable/merged.c +++ b/reftable/merged.c @@ -155,7 +155,7 @@ static int merged_iter_next_entry(struct merged_iter *mi, } mi->advance_index = entry.index; - SWAP(*rec, *entry.rec); + REFTABLE_SWAP(*rec, *entry.rec); return 0; } diff --git a/reftable/pq.c b/reftable/pq.c index ef8035cfd9b692..82394a972db2d9 100644 --- a/reftable/pq.c +++ b/reftable/pq.c @@ -57,7 +57,7 @@ int merged_iter_pqueue_remove(struct merged_iter_pqueue *pq, struct pq_entry *ou if (min == i) break; - SWAP(pq->heap[i], pq->heap[min]); + REFTABLE_SWAP(pq->heap[i], pq->heap[min]); i = min; } @@ -81,7 +81,7 @@ int merged_iter_pqueue_add(struct merged_iter_pqueue *pq, const struct pq_entry size_t j = (i - 1) / 2; if (pq_less(&pq->heap[j], &pq->heap[i])) break; - SWAP(pq->heap[j], pq->heap[i]); + REFTABLE_SWAP(pq->heap[j], pq->heap[i]); i = j; } diff --git a/reftable/record.c b/reftable/record.c index 3552bafa9945eb..9a1edf39a0279b 100644 --- a/reftable/record.c +++ b/reftable/record.c @@ -237,11 +237,11 @@ static int reftable_ref_record_copy_from(void *rec, const void *src_rec, size_t refname_cap = 0; int err; - SWAP(refname, ref->refname); - SWAP(refname_cap, ref->refname_cap); + REFTABLE_SWAP(refname, ref->refname); + REFTABLE_SWAP(refname_cap, ref->refname_cap); reftable_ref_record_release(ref); - SWAP(ref->refname, refname); - SWAP(ref->refname_cap, refname_cap); + REFTABLE_SWAP(ref->refname, refname); + REFTABLE_SWAP(ref->refname_cap, refname_cap); if (src->refname) { size_t refname_len = strlen(src->refname); @@ -376,11 +376,11 @@ static int reftable_ref_record_decode(void *rec, struct reftable_buf key, return n; string_view_consume(&in, n); - SWAP(refname, r->refname); - SWAP(refname_cap, r->refname_cap); + REFTABLE_SWAP(refname, r->refname); + REFTABLE_SWAP(refname_cap, r->refname_cap); reftable_ref_record_release(r); - SWAP(r->refname, refname); - SWAP(r->refname_cap, refname_cap); + REFTABLE_SWAP(r->refname, refname); + REFTABLE_SWAP(r->refname_cap, refname_cap); REFTABLE_ALLOC_GROW_OR_NULL(r->refname, key.len + 1, r->refname_cap); if (!r->refname) { From f93b2a04243ee1a95920ae02a9423accbe1c3719 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 18 Feb 2025 10:20:50 +0100 Subject: [PATCH 015/775] reftable/basics: introduce `REFTABLE_UNUSED` annotation Introduce the `REFTABLE_UNUSED` annotation and replace all existing users of `UNUSED` in the reftable library to use the new macro instead. Note that we unconditionally define `MAYBE_UNUSED` in the exact same way, so doing so unconditionally for `REFTABLE_UNUSED` should be fine, too. Suggested-by: Toon Claes Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/basics.h | 2 ++ reftable/blocksource.c | 6 +++--- reftable/iter.c | 10 +++++----- reftable/record.c | 26 +++++++++++++------------- reftable/writer.c | 2 +- 5 files changed, 24 insertions(+), 22 deletions(-) diff --git a/reftable/basics.h b/reftable/basics.h index 59000798f0be1f..fd59cbb77204d4 100644 --- a/reftable/basics.h +++ b/reftable/basics.h @@ -16,6 +16,8 @@ license that can be found in the LICENSE file or at #include "system.h" #include "reftable-basics.h" +#define REFTABLE_UNUSED __attribute__((__unused__)) + struct reftable_buf { size_t alloc; size_t len; diff --git a/reftable/blocksource.c b/reftable/blocksource.c index 02972c46f4b6c2..78c1be233733ca 100644 --- a/reftable/blocksource.c +++ b/reftable/blocksource.c @@ -13,14 +13,14 @@ license that can be found in the LICENSE file or at #include "reftable-blocksource.h" #include "reftable-error.h" -static void reftable_buf_return_block(void *b UNUSED, struct reftable_block *dest) +static void reftable_buf_return_block(void *b REFTABLE_UNUSED, struct reftable_block *dest) { if (dest->len) memset(dest->data, 0xff, dest->len); reftable_free(dest->data); } -static void reftable_buf_close(void *b UNUSED) +static void reftable_buf_close(void *b REFTABLE_UNUSED) { } @@ -67,7 +67,7 @@ static uint64_t file_size(void *b) return ((struct file_block_source *)b)->size; } -static void file_return_block(void *b UNUSED, struct reftable_block *dest UNUSED) +static void file_return_block(void *b REFTABLE_UNUSED, struct reftable_block *dest REFTABLE_UNUSED) { } diff --git a/reftable/iter.c b/reftable/iter.c index b2ffb09c16bc4d..f520382e70006f 100644 --- a/reftable/iter.c +++ b/reftable/iter.c @@ -25,17 +25,17 @@ int iterator_next(struct reftable_iterator *it, struct reftable_record *rec) return it->ops->next(it->iter_arg, rec); } -static int empty_iterator_seek(void *arg UNUSED, struct reftable_record *want UNUSED) +static int empty_iterator_seek(void *arg REFTABLE_UNUSED, struct reftable_record *want REFTABLE_UNUSED) { return 0; } -static int empty_iterator_next(void *arg UNUSED, struct reftable_record *rec UNUSED) +static int empty_iterator_next(void *arg REFTABLE_UNUSED, struct reftable_record *rec REFTABLE_UNUSED) { return 1; } -static void empty_iterator_close(void *arg UNUSED) +static void empty_iterator_close(void *arg REFTABLE_UNUSED) { } @@ -143,8 +143,8 @@ static int indexed_table_ref_iter_next_block(struct indexed_table_ref_iter *it) return 0; } -static int indexed_table_ref_iter_seek(void *p UNUSED, - struct reftable_record *want UNUSED) +static int indexed_table_ref_iter_seek(void *p REFTABLE_UNUSED, + struct reftable_record *want REFTABLE_UNUSED) { return REFTABLE_API_ERROR; } diff --git a/reftable/record.c b/reftable/record.c index 9a1edf39a0279b..142853d5070fbe 100644 --- a/reftable/record.c +++ b/reftable/record.c @@ -490,7 +490,7 @@ static void reftable_obj_record_release(void *rec) } static int reftable_obj_record_copy_from(void *rec, const void *src_rec, - uint32_t hash_size UNUSED) + uint32_t hash_size REFTABLE_UNUSED) { struct reftable_obj_record *obj = rec; const struct reftable_obj_record *src = src_rec; @@ -528,7 +528,7 @@ static uint8_t reftable_obj_record_val_type(const void *rec) } static int reftable_obj_record_encode(const void *rec, struct string_view s, - uint32_t hash_size UNUSED) + uint32_t hash_size REFTABLE_UNUSED) { const struct reftable_obj_record *r = rec; struct string_view start = s; @@ -563,8 +563,8 @@ static int reftable_obj_record_encode(const void *rec, struct string_view s, static int reftable_obj_record_decode(void *rec, struct reftable_buf key, uint8_t val_type, struct string_view in, - uint32_t hash_size UNUSED, - struct reftable_buf *scratch UNUSED) + uint32_t hash_size REFTABLE_UNUSED, + struct reftable_buf *scratch REFTABLE_UNUSED) { struct string_view start = in; struct reftable_obj_record *r = rec; @@ -618,13 +618,13 @@ static int reftable_obj_record_decode(void *rec, struct reftable_buf key, return start.len - in.len; } -static int not_a_deletion(const void *p UNUSED) +static int not_a_deletion(const void *p REFTABLE_UNUSED) { return 0; } static int reftable_obj_record_equal_void(const void *a, const void *b, - uint32_t hash_size UNUSED) + uint32_t hash_size REFTABLE_UNUSED) { struct reftable_obj_record *ra = (struct reftable_obj_record *) a; struct reftable_obj_record *rb = (struct reftable_obj_record *) b; @@ -1054,7 +1054,7 @@ static int reftable_index_record_key(const void *r, struct reftable_buf *dest) } static int reftable_index_record_copy_from(void *rec, const void *src_rec, - uint32_t hash_size UNUSED) + uint32_t hash_size REFTABLE_UNUSED) { struct reftable_index_record *dst = rec; const struct reftable_index_record *src = src_rec; @@ -1075,13 +1075,13 @@ static void reftable_index_record_release(void *rec) reftable_buf_release(&idx->last_key); } -static uint8_t reftable_index_record_val_type(const void *rec UNUSED) +static uint8_t reftable_index_record_val_type(const void *rec REFTABLE_UNUSED) { return 0; } static int reftable_index_record_encode(const void *rec, struct string_view out, - uint32_t hash_size UNUSED) + uint32_t hash_size REFTABLE_UNUSED) { const struct reftable_index_record *r = (const struct reftable_index_record *)rec; @@ -1097,10 +1097,10 @@ static int reftable_index_record_encode(const void *rec, struct string_view out, } static int reftable_index_record_decode(void *rec, struct reftable_buf key, - uint8_t val_type UNUSED, + uint8_t val_type REFTABLE_UNUSED, struct string_view in, - uint32_t hash_size UNUSED, - struct reftable_buf *scratch UNUSED) + uint32_t hash_size REFTABLE_UNUSED, + struct reftable_buf *scratch REFTABLE_UNUSED) { struct string_view start = in; struct reftable_index_record *r = rec; @@ -1120,7 +1120,7 @@ static int reftable_index_record_decode(void *rec, struct reftable_buf key, } static int reftable_index_record_equal(const void *a, const void *b, - uint32_t hash_size UNUSED) + uint32_t hash_size REFTABLE_UNUSED) { struct reftable_index_record *ia = (struct reftable_index_record *) a; struct reftable_index_record *ib = (struct reftable_index_record *) b; diff --git a/reftable/writer.c b/reftable/writer.c index 913b971b591eb4..ce55a1deb066fb 100644 --- a/reftable/writer.c +++ b/reftable/writer.c @@ -649,7 +649,7 @@ static void write_object_record(void *void_arg, void *key) done:; } -static void object_record_free(void *void_arg UNUSED, void *key) +static void object_record_free(void *void_arg REFTABLE_UNUSED, void *key) { struct obj_index_tree_node *entry = key; From 0b960a53daa1b336bcec5810f70d8af8ced86ca8 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 18 Feb 2025 10:20:51 +0100 Subject: [PATCH 016/775] compat/mingw: split out POSIX-related bits Split out POSIX-related bits from "compat/mingw.h" and "compat/msvc.h". This is in preparation for splitting up "git-compat-utils.h" into a header that provides POSIX-compatibility and a header that provides common wrappers used by the Git project. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- compat/mingw-posix.h | 431 +++++++++++++++++++++++++++++++++++++++++++ compat/mingw.h | 426 +----------------------------------------- compat/msvc-posix.h | 33 ++++ compat/msvc.h | 30 +-- 4 files changed, 467 insertions(+), 453 deletions(-) create mode 100644 compat/mingw-posix.h create mode 100644 compat/msvc-posix.h diff --git a/compat/mingw-posix.h b/compat/mingw-posix.h new file mode 100644 index 00000000000000..8dddfa818d6d1a --- /dev/null +++ b/compat/mingw-posix.h @@ -0,0 +1,431 @@ +#ifndef COMPAT_MINGW_POSIX_H +#define COMPAT_MINGW_POSIX_H + +#ifdef __MINGW64_VERSION_MAJOR +#include +#include +typedef _sigset_t sigset_t; +#endif +#include +#include + +/* MinGW-w64 reports to have flockfile, but it does not actually have it. */ +#ifdef __MINGW64_VERSION_MAJOR +#undef _POSIX_THREAD_SAFE_FUNCTIONS +#endif + +/* + * things that are not available in header files + */ + +typedef int uid_t; +typedef int socklen_t; +#ifndef __MINGW64_VERSION_MAJOR +typedef int pid_t; +#define hstrerror strerror +#endif + +#define S_IFLNK 0120000 /* Symbolic link */ +#define S_ISLNK(x) (((x) & S_IFMT) == S_IFLNK) +#define S_ISSOCK(x) 0 + +#ifndef S_IRWXG +#define S_IRGRP 0 +#define S_IWGRP 0 +#define S_IXGRP 0 +#define S_IRWXG (S_IRGRP | S_IWGRP | S_IXGRP) +#endif +#ifndef S_IRWXO +#define S_IROTH 0 +#define S_IWOTH 0 +#define S_IXOTH 0 +#define S_IRWXO (S_IROTH | S_IWOTH | S_IXOTH) +#endif + +#define S_ISUID 0004000 +#define S_ISGID 0002000 +#define S_ISVTX 0001000 + +#define WIFEXITED(x) 1 +#define WIFSIGNALED(x) 0 +#define WEXITSTATUS(x) ((x) & 0xff) +#define WTERMSIG(x) SIGTERM + +#ifndef EWOULDBLOCK +#define EWOULDBLOCK EAGAIN +#endif +#ifndef ELOOP +#define ELOOP EMLINK +#endif +#define SHUT_WR SD_SEND + +#define SIGHUP 1 +#define SIGQUIT 3 +#define SIGKILL 9 +#define SIGPIPE 13 +#define SIGALRM 14 +#define SIGCHLD 17 + +#define F_GETFD 1 +#define F_SETFD 2 +#define FD_CLOEXEC 0x1 + +#if !defined O_CLOEXEC && defined O_NOINHERIT +#define O_CLOEXEC O_NOINHERIT +#endif + +#ifndef EAFNOSUPPORT +#define EAFNOSUPPORT WSAEAFNOSUPPORT +#endif +#ifndef ECONNABORTED +#define ECONNABORTED WSAECONNABORTED +#endif +#ifndef ENOTSOCK +#define ENOTSOCK WSAENOTSOCK +#endif + +struct passwd { + char *pw_name; + char *pw_gecos; + char *pw_dir; +}; + +typedef void (__cdecl *sig_handler_t)(int); +struct sigaction { + sig_handler_t sa_handler; + unsigned sa_flags; +}; +#define SA_RESTART 0 + +struct itimerval { + struct timeval it_value, it_interval; +}; +#define ITIMER_REAL 0 + +struct utsname { + char sysname[16]; + char nodename[1]; + char release[16]; + char version[16]; + char machine[1]; +}; + +/* + * sanitize preprocessor namespace polluted by Windows headers defining + * macros which collide with git local versions + */ +#undef HELP_COMMAND /* from winuser.h */ + +/* + * trivial stubs + */ + +static inline int readlink(const char *path UNUSED, char *buf UNUSED, size_t bufsiz UNUSED) +{ errno = ENOSYS; return -1; } +static inline int symlink(const char *oldpath UNUSED, const char *newpath UNUSED) +{ errno = ENOSYS; return -1; } +static inline int fchmod(int fildes UNUSED, mode_t mode UNUSED) +{ errno = ENOSYS; return -1; } +#ifndef __MINGW64_VERSION_MAJOR +static inline pid_t fork(void) +{ errno = ENOSYS; return -1; } +#endif +static inline unsigned int alarm(unsigned int seconds UNUSED) +{ return 0; } +static inline int fsync(int fd) +{ return _commit(fd); } +static inline void sync(void) +{} +static inline uid_t getuid(void) +{ return 1; } +static inline struct passwd *getpwnam(const char *name UNUSED) +{ return NULL; } +static inline int fcntl(int fd UNUSED, int cmd, ...) +{ + if (cmd == F_GETFD || cmd == F_SETFD) + return 0; + errno = EINVAL; + return -1; +} + +#define sigemptyset(x) (void)0 +static inline int sigaddset(sigset_t *set UNUSED, int signum UNUSED) +{ return 0; } +#define SIG_BLOCK 0 +#define SIG_UNBLOCK 0 +static inline int sigprocmask(int how UNUSED, const sigset_t *set UNUSED, sigset_t *oldset UNUSED) +{ return 0; } +static inline pid_t getppid(void) +{ return 1; } +static inline pid_t getpgid(pid_t pid) +{ return pid == 0 ? getpid() : pid; } +static inline pid_t tcgetpgrp(int fd UNUSED) +{ return getpid(); } + +/* + * simple adaptors + */ + +int mingw_mkdir(const char *path, int mode); +#define mkdir mingw_mkdir + +#define WNOHANG 1 +pid_t waitpid(pid_t pid, int *status, int options); + +#define kill mingw_kill +int mingw_kill(pid_t pid, int sig); + +#define locate_in_PATH mingw_locate_in_PATH +char *mingw_locate_in_PATH(const char *cmd); + +/* + * implementations of missing functions + */ + +int pipe(int filedes[2]); +unsigned int sleep (unsigned int seconds); +int mkstemp(char *template); +int gettimeofday(struct timeval *tv, void *tz); +#ifndef __MINGW64_VERSION_MAJOR +struct tm *gmtime_r(const time_t *timep, struct tm *result); +struct tm *localtime_r(const time_t *timep, struct tm *result); +#endif +int getpagesize(void); /* defined in MinGW's libgcc.a */ +struct passwd *getpwuid(uid_t uid); +int setitimer(int type, struct itimerval *in, struct itimerval *out); +int sigaction(int sig, struct sigaction *in, struct sigaction *out); +int link(const char *oldpath, const char *newpath); +int uname(struct utsname *buf); + +/* + * replacements of existing functions + */ + +int mingw_unlink(const char *pathname); +#define unlink mingw_unlink + +int mingw_rmdir(const char *path); +#define rmdir mingw_rmdir + +int mingw_open (const char *filename, int oflags, ...); +#define open mingw_open +#undef OPEN_RETURNS_EINTR + +int mingw_fgetc(FILE *stream); +#define fgetc mingw_fgetc + +FILE *mingw_fopen (const char *filename, const char *otype); +#define fopen mingw_fopen + +FILE *mingw_freopen (const char *filename, const char *otype, FILE *stream); +#define freopen mingw_freopen + +int mingw_fflush(FILE *stream); +#define fflush mingw_fflush + +ssize_t mingw_write(int fd, const void *buf, size_t len); +#define write mingw_write + +int mingw_access(const char *filename, int mode); +#undef access +#define access mingw_access + +int mingw_chdir(const char *dirname); +#define chdir mingw_chdir + +int mingw_chmod(const char *filename, int mode); +#define chmod mingw_chmod + +char *mingw_mktemp(char *template); +#define mktemp mingw_mktemp + +char *mingw_getcwd(char *pointer, int len); +#define getcwd mingw_getcwd + +#ifdef NO_UNSETENV +#error "NO_UNSETENV is incompatible with the Windows-specific startup code!" +#endif + +/* + * We bind *env() routines (even the mingw_ ones) to private mingw_ versions. + * These talk to the CRT using UNICODE/wchar_t, but maintain the original + * narrow-char API. + * + * Note that the MSCRT maintains both ANSI (getenv()) and UNICODE (_wgetenv()) + * routines and stores both versions of each environment variable in parallel + * (and secretly updates both when you set one or the other), but it uses CP_ACP + * to do the conversion rather than CP_UTF8. + * + * Since everything in the git code base is UTF8, we define the mingw_ routines + * to access the CRT using the UNICODE routines and manually convert them to + * UTF8. This also avoids round-trip problems. + * + * This also helps with our linkage, since "_wenviron" is publicly exported + * from the CRT. But to access "_environ" we would have to statically link + * to the CRT (/MT). + * + * We require NO_SETENV (and let gitsetenv() call our mingw_putenv). + */ +#define getenv mingw_getenv +#define putenv mingw_putenv +#define unsetenv mingw_putenv +char *mingw_getenv(const char *name); +int mingw_putenv(const char *name); + +int mingw_gethostname(char *host, int namelen); +#define gethostname mingw_gethostname + +struct hostent *mingw_gethostbyname(const char *host); +#define gethostbyname mingw_gethostbyname + +int mingw_getaddrinfo(const char *node, const char *service, + const struct addrinfo *hints, struct addrinfo **res); +#define getaddrinfo mingw_getaddrinfo + +int mingw_socket(int domain, int type, int protocol); +#define socket mingw_socket + +int mingw_connect(int sockfd, struct sockaddr *sa, size_t sz); +#define connect mingw_connect + +int mingw_bind(int sockfd, struct sockaddr *sa, size_t sz); +#define bind mingw_bind + +int mingw_setsockopt(int sockfd, int lvl, int optname, void *optval, int optlen); +#define setsockopt mingw_setsockopt + +int mingw_shutdown(int sockfd, int how); +#define shutdown mingw_shutdown + +int mingw_listen(int sockfd, int backlog); +#define listen mingw_listen + +int mingw_accept(int sockfd, struct sockaddr *sa, socklen_t *sz); +#define accept mingw_accept + +int mingw_rename(const char*, const char*); +#define rename mingw_rename + +#if defined(USE_WIN32_MMAP) || defined(_MSC_VER) +int mingw_getpagesize(void); +#define getpagesize mingw_getpagesize +#endif + +int win32_fsync_no_flush(int fd); +#define fsync_no_flush win32_fsync_no_flush + +#define FSYNC_COMPONENTS_PLATFORM_DEFAULT (FSYNC_COMPONENTS_DEFAULT | FSYNC_COMPONENT_LOOSE_OBJECT) +#define FSYNC_METHOD_DEFAULT (FSYNC_METHOD_BATCH) + +struct rlimit { + unsigned int rlim_cur; +}; +#define RLIMIT_NOFILE 0 + +static inline int getrlimit(int resource, struct rlimit *rlp) +{ + if (resource != RLIMIT_NOFILE) { + errno = EINVAL; + return -1; + } + + rlp->rlim_cur = 2048; + return 0; +} + +/* + * Use mingw specific stat()/lstat()/fstat() implementations on Windows, + * including our own struct stat with 64 bit st_size and nanosecond-precision + * file times. + */ +#ifndef __MINGW64_VERSION_MAJOR +#define off_t off64_t +#define lseek _lseeki64 +#ifndef _MSC_VER +struct timespec { + time_t tv_sec; + long tv_nsec; +}; +#endif +#endif + +struct mingw_stat { + _dev_t st_dev; + _ino_t st_ino; + _mode_t st_mode; + short st_nlink; + short st_uid; + short st_gid; + _dev_t st_rdev; + off64_t st_size; + struct timespec st_atim; + struct timespec st_mtim; + struct timespec st_ctim; +}; + +#define st_atime st_atim.tv_sec +#define st_mtime st_mtim.tv_sec +#define st_ctime st_ctim.tv_sec + +#ifdef stat +#undef stat +#endif +#define stat mingw_stat +int mingw_lstat(const char *file_name, struct stat *buf); +int mingw_stat(const char *file_name, struct stat *buf); +int mingw_fstat(int fd, struct stat *buf); +#ifdef fstat +#undef fstat +#endif +#define fstat mingw_fstat +#ifdef lstat +#undef lstat +#endif +#define lstat mingw_lstat + + +int mingw_utime(const char *file_name, const struct utimbuf *times); +#define utime mingw_utime +size_t mingw_strftime(char *s, size_t max, + const char *format, const struct tm *tm); +#define strftime mingw_strftime + +pid_t mingw_spawnvpe(const char *cmd, const char **argv, char **env, + const char *dir, + int fhin, int fhout, int fherr); +int mingw_execvp(const char *cmd, char *const *argv); +#define execvp mingw_execvp +int mingw_execv(const char *cmd, char *const *argv); +#define execv mingw_execv + +static inline unsigned int git_ntohl(unsigned int x) +{ return (unsigned int)ntohl(x); } +#define ntohl git_ntohl + +sig_handler_t mingw_signal(int sig, sig_handler_t handler); +#define signal mingw_signal + +int mingw_raise(int sig); +#define raise mingw_raise + +/* + * ANSI emulation wrappers + */ + +int winansi_isatty(int fd); +#define isatty winansi_isatty + +int winansi_dup2(int oldfd, int newfd); +#define dup2 winansi_dup2 + +void winansi_init(void); +HANDLE winansi_get_osfhandle(int fd); + +#if !defined(__MINGW64_VERSION_MAJOR) && (!defined(_MSC_VER) || _MSC_VER < 1800) +#define PRIuMAX "I64u" +#define PRId64 "I64d" +#else +#include +#endif + +#endif /* COMPAT_MINGW_POSIX_H */ diff --git a/compat/mingw.h b/compat/mingw.h index ebfb8ba423b754..444daedfa52469 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -1,185 +1,10 @@ -#ifdef __MINGW64_VERSION_MAJOR -#include -#include -typedef _sigset_t sigset_t; -#endif -#include -#include - -/* MinGW-w64 reports to have flockfile, but it does not actually have it. */ -#ifdef __MINGW64_VERSION_MAJOR -#undef _POSIX_THREAD_SAFE_FUNCTIONS -#endif +#include "mingw-posix.h" struct config_context; int mingw_core_config(const char *var, const char *value, const struct config_context *ctx, void *cb); #define platform_core_config mingw_core_config -/* - * things that are not available in header files - */ - -typedef int uid_t; -typedef int socklen_t; -#ifndef __MINGW64_VERSION_MAJOR -typedef int pid_t; -#define hstrerror strerror -#endif - -#define S_IFLNK 0120000 /* Symbolic link */ -#define S_ISLNK(x) (((x) & S_IFMT) == S_IFLNK) -#define S_ISSOCK(x) 0 - -#ifndef S_IRWXG -#define S_IRGRP 0 -#define S_IWGRP 0 -#define S_IXGRP 0 -#define S_IRWXG (S_IRGRP | S_IWGRP | S_IXGRP) -#endif -#ifndef S_IRWXO -#define S_IROTH 0 -#define S_IWOTH 0 -#define S_IXOTH 0 -#define S_IRWXO (S_IROTH | S_IWOTH | S_IXOTH) -#endif - -#define S_ISUID 0004000 -#define S_ISGID 0002000 -#define S_ISVTX 0001000 - -#define WIFEXITED(x) 1 -#define WIFSIGNALED(x) 0 -#define WEXITSTATUS(x) ((x) & 0xff) -#define WTERMSIG(x) SIGTERM - -#ifndef EWOULDBLOCK -#define EWOULDBLOCK EAGAIN -#endif -#ifndef ELOOP -#define ELOOP EMLINK -#endif -#define SHUT_WR SD_SEND - -#define SIGHUP 1 -#define SIGQUIT 3 -#define SIGKILL 9 -#define SIGPIPE 13 -#define SIGALRM 14 -#define SIGCHLD 17 - -#define F_GETFD 1 -#define F_SETFD 2 -#define FD_CLOEXEC 0x1 - -#if !defined O_CLOEXEC && defined O_NOINHERIT -#define O_CLOEXEC O_NOINHERIT -#endif - -#ifndef EAFNOSUPPORT -#define EAFNOSUPPORT WSAEAFNOSUPPORT -#endif -#ifndef ECONNABORTED -#define ECONNABORTED WSAECONNABORTED -#endif -#ifndef ENOTSOCK -#define ENOTSOCK WSAENOTSOCK -#endif - -struct passwd { - char *pw_name; - char *pw_gecos; - char *pw_dir; -}; - -typedef void (__cdecl *sig_handler_t)(int); -struct sigaction { - sig_handler_t sa_handler; - unsigned sa_flags; -}; -#define SA_RESTART 0 - -struct itimerval { - struct timeval it_value, it_interval; -}; -#define ITIMER_REAL 0 - -struct utsname { - char sysname[16]; - char nodename[1]; - char release[16]; - char version[16]; - char machine[1]; -}; - -/* - * sanitize preprocessor namespace polluted by Windows headers defining - * macros which collide with git local versions - */ -#undef HELP_COMMAND /* from winuser.h */ - -/* - * trivial stubs - */ - -static inline int readlink(const char *path UNUSED, char *buf UNUSED, size_t bufsiz UNUSED) -{ errno = ENOSYS; return -1; } -static inline int symlink(const char *oldpath UNUSED, const char *newpath UNUSED) -{ errno = ENOSYS; return -1; } -static inline int fchmod(int fildes UNUSED, mode_t mode UNUSED) -{ errno = ENOSYS; return -1; } -#ifndef __MINGW64_VERSION_MAJOR -static inline pid_t fork(void) -{ errno = ENOSYS; return -1; } -#endif -static inline unsigned int alarm(unsigned int seconds UNUSED) -{ return 0; } -static inline int fsync(int fd) -{ return _commit(fd); } -static inline void sync(void) -{} -static inline uid_t getuid(void) -{ return 1; } -static inline struct passwd *getpwnam(const char *name UNUSED) -{ return NULL; } -static inline int fcntl(int fd UNUSED, int cmd, ...) -{ - if (cmd == F_GETFD || cmd == F_SETFD) - return 0; - errno = EINVAL; - return -1; -} - -#define sigemptyset(x) (void)0 -static inline int sigaddset(sigset_t *set UNUSED, int signum UNUSED) -{ return 0; } -#define SIG_BLOCK 0 -#define SIG_UNBLOCK 0 -static inline int sigprocmask(int how UNUSED, const sigset_t *set UNUSED, sigset_t *oldset UNUSED) -{ return 0; } -static inline pid_t getppid(void) -{ return 1; } -static inline pid_t getpgid(pid_t pid) -{ return pid == 0 ? getpid() : pid; } -static inline pid_t tcgetpgrp(int fd UNUSED) -{ return getpid(); } - -/* - * simple adaptors - */ - -int mingw_mkdir(const char *path, int mode); -#define mkdir mingw_mkdir - -#define WNOHANG 1 -pid_t waitpid(pid_t pid, int *status, int options); - -#define kill mingw_kill -int mingw_kill(pid_t pid, int sig); - -#define locate_in_PATH mingw_locate_in_PATH -char *mingw_locate_in_PATH(const char *cmd); - #ifndef NO_OPENSSL #include static inline int mingw_SSL_set_fd(SSL *ssl, int fd) @@ -201,249 +26,6 @@ static inline int mingw_SSL_set_wfd(SSL *ssl, int fd) #define SSL_set_wfd mingw_SSL_set_wfd #endif -/* - * implementations of missing functions - */ - -int pipe(int filedes[2]); -unsigned int sleep (unsigned int seconds); -int mkstemp(char *template); -int gettimeofday(struct timeval *tv, void *tz); -#ifndef __MINGW64_VERSION_MAJOR -struct tm *gmtime_r(const time_t *timep, struct tm *result); -struct tm *localtime_r(const time_t *timep, struct tm *result); -#endif -int getpagesize(void); /* defined in MinGW's libgcc.a */ -struct passwd *getpwuid(uid_t uid); -int setitimer(int type, struct itimerval *in, struct itimerval *out); -int sigaction(int sig, struct sigaction *in, struct sigaction *out); -int link(const char *oldpath, const char *newpath); -int uname(struct utsname *buf); - -/* - * replacements of existing functions - */ - -int mingw_unlink(const char *pathname); -#define unlink mingw_unlink - -int mingw_rmdir(const char *path); -#define rmdir mingw_rmdir - -int mingw_open (const char *filename, int oflags, ...); -#define open mingw_open -#undef OPEN_RETURNS_EINTR - -int mingw_fgetc(FILE *stream); -#define fgetc mingw_fgetc - -FILE *mingw_fopen (const char *filename, const char *otype); -#define fopen mingw_fopen - -FILE *mingw_freopen (const char *filename, const char *otype, FILE *stream); -#define freopen mingw_freopen - -int mingw_fflush(FILE *stream); -#define fflush mingw_fflush - -ssize_t mingw_write(int fd, const void *buf, size_t len); -#define write mingw_write - -int mingw_access(const char *filename, int mode); -#undef access -#define access mingw_access - -int mingw_chdir(const char *dirname); -#define chdir mingw_chdir - -int mingw_chmod(const char *filename, int mode); -#define chmod mingw_chmod - -char *mingw_mktemp(char *template); -#define mktemp mingw_mktemp - -char *mingw_getcwd(char *pointer, int len); -#define getcwd mingw_getcwd - -#ifdef NO_UNSETENV -#error "NO_UNSETENV is incompatible with the Windows-specific startup code!" -#endif - -/* - * We bind *env() routines (even the mingw_ ones) to private mingw_ versions. - * These talk to the CRT using UNICODE/wchar_t, but maintain the original - * narrow-char API. - * - * Note that the MSCRT maintains both ANSI (getenv()) and UNICODE (_wgetenv()) - * routines and stores both versions of each environment variable in parallel - * (and secretly updates both when you set one or the other), but it uses CP_ACP - * to do the conversion rather than CP_UTF8. - * - * Since everything in the git code base is UTF8, we define the mingw_ routines - * to access the CRT using the UNICODE routines and manually convert them to - * UTF8. This also avoids round-trip problems. - * - * This also helps with our linkage, since "_wenviron" is publicly exported - * from the CRT. But to access "_environ" we would have to statically link - * to the CRT (/MT). - * - * We require NO_SETENV (and let gitsetenv() call our mingw_putenv). - */ -#define getenv mingw_getenv -#define putenv mingw_putenv -#define unsetenv mingw_putenv -char *mingw_getenv(const char *name); -int mingw_putenv(const char *name); - -int mingw_gethostname(char *host, int namelen); -#define gethostname mingw_gethostname - -struct hostent *mingw_gethostbyname(const char *host); -#define gethostbyname mingw_gethostbyname - -int mingw_getaddrinfo(const char *node, const char *service, - const struct addrinfo *hints, struct addrinfo **res); -#define getaddrinfo mingw_getaddrinfo - -int mingw_socket(int domain, int type, int protocol); -#define socket mingw_socket - -int mingw_connect(int sockfd, struct sockaddr *sa, size_t sz); -#define connect mingw_connect - -int mingw_bind(int sockfd, struct sockaddr *sa, size_t sz); -#define bind mingw_bind - -int mingw_setsockopt(int sockfd, int lvl, int optname, void *optval, int optlen); -#define setsockopt mingw_setsockopt - -int mingw_shutdown(int sockfd, int how); -#define shutdown mingw_shutdown - -int mingw_listen(int sockfd, int backlog); -#define listen mingw_listen - -int mingw_accept(int sockfd, struct sockaddr *sa, socklen_t *sz); -#define accept mingw_accept - -int mingw_rename(const char*, const char*); -#define rename mingw_rename - -#if defined(USE_WIN32_MMAP) || defined(_MSC_VER) -int mingw_getpagesize(void); -#define getpagesize mingw_getpagesize -#endif - -int win32_fsync_no_flush(int fd); -#define fsync_no_flush win32_fsync_no_flush - -#define FSYNC_COMPONENTS_PLATFORM_DEFAULT (FSYNC_COMPONENTS_DEFAULT | FSYNC_COMPONENT_LOOSE_OBJECT) -#define FSYNC_METHOD_DEFAULT (FSYNC_METHOD_BATCH) - -struct rlimit { - unsigned int rlim_cur; -}; -#define RLIMIT_NOFILE 0 - -static inline int getrlimit(int resource, struct rlimit *rlp) -{ - if (resource != RLIMIT_NOFILE) { - errno = EINVAL; - return -1; - } - - rlp->rlim_cur = 2048; - return 0; -} - -/* - * Use mingw specific stat()/lstat()/fstat() implementations on Windows, - * including our own struct stat with 64 bit st_size and nanosecond-precision - * file times. - */ -#ifndef __MINGW64_VERSION_MAJOR -#define off_t off64_t -#define lseek _lseeki64 -#ifndef _MSC_VER -struct timespec { - time_t tv_sec; - long tv_nsec; -}; -#endif -#endif - -struct mingw_stat { - _dev_t st_dev; - _ino_t st_ino; - _mode_t st_mode; - short st_nlink; - short st_uid; - short st_gid; - _dev_t st_rdev; - off64_t st_size; - struct timespec st_atim; - struct timespec st_mtim; - struct timespec st_ctim; -}; - -#define st_atime st_atim.tv_sec -#define st_mtime st_mtim.tv_sec -#define st_ctime st_ctim.tv_sec - -#ifdef stat -#undef stat -#endif -#define stat mingw_stat -int mingw_lstat(const char *file_name, struct stat *buf); -int mingw_stat(const char *file_name, struct stat *buf); -int mingw_fstat(int fd, struct stat *buf); -#ifdef fstat -#undef fstat -#endif -#define fstat mingw_fstat -#ifdef lstat -#undef lstat -#endif -#define lstat mingw_lstat - - -int mingw_utime(const char *file_name, const struct utimbuf *times); -#define utime mingw_utime -size_t mingw_strftime(char *s, size_t max, - const char *format, const struct tm *tm); -#define strftime mingw_strftime - -pid_t mingw_spawnvpe(const char *cmd, const char **argv, char **env, - const char *dir, - int fhin, int fhout, int fherr); -int mingw_execvp(const char *cmd, char *const *argv); -#define execvp mingw_execvp -int mingw_execv(const char *cmd, char *const *argv); -#define execv mingw_execv - -static inline unsigned int git_ntohl(unsigned int x) -{ return (unsigned int)ntohl(x); } -#define ntohl git_ntohl - -sig_handler_t mingw_signal(int sig, sig_handler_t handler); -#define signal mingw_signal - -int mingw_raise(int sig); -#define raise mingw_raise - -/* - * ANSI emulation wrappers - */ - -int winansi_isatty(int fd); -#define isatty winansi_isatty - -int winansi_dup2(int oldfd, int newfd); -#define dup2 winansi_dup2 - -void winansi_init(void); -HANDLE winansi_get_osfhandle(int fd); - /* * git specific compatibility */ @@ -457,12 +39,6 @@ static inline void convert_slashes(char *path) #define PATH_SEP ';' char *mingw_query_user_email(void); #define query_user_email mingw_query_user_email -#if !defined(__MINGW64_VERSION_MAJOR) && (!defined(_MSC_VER) || _MSC_VER < 1800) -#define PRIuMAX "I64u" -#define PRId64 "I64d" -#else -#include -#endif /** * Verifies that the specified path is owned by the user running the diff --git a/compat/msvc-posix.h b/compat/msvc-posix.h new file mode 100644 index 00000000000000..c500b8b4aaf945 --- /dev/null +++ b/compat/msvc-posix.h @@ -0,0 +1,33 @@ +#ifndef COMPAT_MSVC_POSIX_H +#define COMPAT_MSVC_POSIX_H + +#include +#include +#include +#include + +#pragma warning(disable: 4018) /* signed/unsigned comparison */ +#pragma warning(disable: 4244) /* type conversion, possible loss of data */ +#pragma warning(disable: 4090) /* 'function' : different 'const' qualifiers (ALLOC_GROW etc.)*/ + +/* porting function */ +#define inline __inline +#define __inline__ __inline +#define __attribute__(x) +#define strcasecmp _stricmp +#define strncasecmp _strnicmp +#define ftruncate _chsize +#define strtoull _strtoui64 +#define strtoll _strtoi64 + +#undef ERROR + +#define ftello _ftelli64 + +typedef int sigset_t; +/* open for reading, writing, or both (not in fcntl.h) */ +#define O_ACCMODE (_O_RDONLY | _O_WRONLY | _O_RDWR) + +#include "mingw-posix.h" + +#endif /* COMPAT_MSVC_POSIX_H */ diff --git a/compat/msvc.h b/compat/msvc.h index 1d7a8c614565a6..2b87c0a7c702fc 100644 --- a/compat/msvc.h +++ b/compat/msvc.h @@ -1,33 +1,7 @@ #ifndef __MSVC__HEAD #define __MSVC__HEAD -#include -#include -#include -#include - -#pragma warning(disable: 4018) /* signed/unsigned comparison */ -#pragma warning(disable: 4244) /* type conversion, possible loss of data */ -#pragma warning(disable: 4090) /* 'function' : different 'const' qualifiers (ALLOC_GROW etc.)*/ - -/* porting function */ -#define inline __inline -#define __inline__ __inline -#define __attribute__(x) -#define strcasecmp _stricmp -#define strncasecmp _strnicmp -#define ftruncate _chsize -#define strtoull _strtoui64 -#define strtoll _strtoi64 - -#undef ERROR - -#define ftello _ftelli64 - -typedef int sigset_t; -/* open for reading, writing, or both (not in fcntl.h) */ -#define O_ACCMODE (_O_RDONLY | _O_WRONLY | _O_RDWR) - -#include "compat/mingw.h" +#include "msvc-posix.h" +#include "mingw.h" #endif From 75a044f748f2c37a2a08854acd49ed1fbdb86bb1 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 18 Feb 2025 10:20:52 +0100 Subject: [PATCH 017/775] git-compat-util.h: split out POSIX-emulating bits The "git-compat-util.h" header is a treasure trove of various bits and pieces used throughout the project. It basically mixes two different things into one: - Providing a POSIX-like interface even on platforms that aren't POSIX-compliant. - Providing low-level functionality that is specific to Git. This intermixing is a bit of a problem for the reftable library as we don't want to recreate the POSIX-like interface there. But neither do we want to pull in the Git-specific functionality, as it is otherwise quite easy to start depending on the Git codebase again. Split out a new header "compat/posix.h" that only contains the bits and pieces relevant for the emulation of POSIX, which we will start using in the next commit. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- compat/posix.h | 541 ++++++++++++++++++++++++++++++++++++++++++++++ git-compat-util.h | 531 +-------------------------------------------- 2 files changed, 543 insertions(+), 529 deletions(-) create mode 100644 compat/posix.h diff --git a/compat/posix.h b/compat/posix.h new file mode 100644 index 00000000000000..f4c71f9427d774 --- /dev/null +++ b/compat/posix.h @@ -0,0 +1,541 @@ +#ifndef COMPAT_POSIX_H +#define COMPAT_POSIX_H + +#define _FILE_OFFSET_BITS 64 + +/* + * Derived from Linux "Features Test Macro" header + * Convenience macros to test the versions of gcc (or + * a compatible compiler). + * Use them like this: + * #if GIT_GNUC_PREREQ (2,8) + * ... code requiring gcc 2.8 or later ... + * #endif + * + * This macro of course is not part of POSIX, but we need it for the UNUSED + * macro which is used by some of our POSIX compatibility wrappers. +*/ +#if defined(__GNUC__) && defined(__GNUC_MINOR__) +# define GIT_GNUC_PREREQ(maj, min) \ + ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min)) +#else + #define GIT_GNUC_PREREQ(maj, min) 0 +#endif + +/* + * UNUSED marks a function parameter that is always unused. It also + * can be used to annotate a function, a variable, or a type that is + * always unused. + * + * A callback interface may dictate that a function accepts a + * parameter at that position, but the implementation of the function + * may not need to use the parameter. In such a case, mark the parameter + * with UNUSED. + * + * When a parameter may be used or unused, depending on conditional + * compilation, consider using MAYBE_UNUSED instead. + */ +#if GIT_GNUC_PREREQ(4, 5) +#define UNUSED __attribute__((unused)) \ + __attribute__((deprecated ("parameter declared as UNUSED"))) +#elif defined(__GNUC__) +#define UNUSED __attribute__((unused)) \ + __attribute__((deprecated)) +#else +#define UNUSED +#endif + +#ifdef __MINGW64__ +#define _POSIX_C_SOURCE 1 +#elif defined(__sun__) + /* + * On Solaris, when _XOPEN_EXTENDED is set, its header file + * forces the programs to be XPG4v2, defeating any _XOPEN_SOURCE + * setting to say we are XPG5 or XPG6. Also on Solaris, + * XPG6 programs must be compiled with a c99 compiler, while + * non XPG6 programs must be compiled with a pre-c99 compiler. + */ +# if __STDC_VERSION__ - 0 >= 199901L +# define _XOPEN_SOURCE 600 +# else +# define _XOPEN_SOURCE 500 +# endif +#elif !defined(__APPLE__) && !defined(__FreeBSD__) && !defined(__USLC__) && \ + !defined(_M_UNIX) && !defined(__sgi) && !defined(__DragonFly__) && \ + !defined(__TANDEM) && !defined(__QNX__) && !defined(__MirBSD__) && \ + !defined(__CYGWIN__) +#define _XOPEN_SOURCE 600 /* glibc2 and AIX 5.3L need 500, OpenBSD needs 600 for S_ISLNK() */ +#define _XOPEN_SOURCE_EXTENDED 1 /* AIX 5.3L needs this */ +#endif +#define _ALL_SOURCE 1 +#define _GNU_SOURCE 1 +#define _BSD_SOURCE 1 +#define _DEFAULT_SOURCE 1 +#define _NETBSD_SOURCE 1 +#define _SGI_SOURCE 1 + +#if defined(WIN32) && !defined(__CYGWIN__) /* Both MinGW and MSVC */ +# if !defined(_WIN32_WINNT) +# define _WIN32_WINNT 0x0600 +# endif +#define WIN32_LEAN_AND_MEAN /* stops windows.h including winsock.h */ +#include +#ifndef NO_UNIX_SOCKETS +#include +#endif +#include +#define GIT_WINDOWS_NATIVE +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef HAVE_STRINGS_H +#include /* for strcasecmp() */ +#endif +#include +#include +#include +#ifdef NEEDS_SYS_PARAM_H +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if !defined(NO_POLL_H) +#include +#elif !defined(NO_SYS_POLL_H) +#include +#else +/* Pull the compat stuff */ +#include +#endif +#ifdef HAVE_BSD_SYSCTL +#include +#endif + +#if defined(__MINGW32__) +#include "mingw-posix.h" +#elif defined(_MSC_VER) +#include "msvc-posix.h" +#else +#include +#include +#include +#include +#include +#include +#include +#ifndef NO_SYS_SELECT_H +#include +#endif +#include +#include +#include +#include +#include +#include +#ifndef NO_INTTYPES_H +#include +#else +#include +#endif +#ifdef HAVE_ARC4RANDOM_LIBBSD +#include +#endif +#ifdef HAVE_GETRANDOM +#include +#endif +#ifdef NO_INTPTR_T +/* + * On I16LP32, ILP32 and LP64 "long" is the safe bet, however + * on LLP86, IL33LLP64 and P64 it needs to be "long long", + * while on IP16 and IP16L32 it is "int" (resp. "short") + * Size needs to match (or exceed) 'sizeof(void *)'. + * We can't take "long long" here as not everybody has it. + */ +typedef long intptr_t; +typedef unsigned long uintptr_t; +#endif +#undef _ALL_SOURCE /* AIX 5.3L defines a struct list with _ALL_SOURCE. */ +#include +#define _ALL_SOURCE 1 +#endif + +#ifdef MKDIR_WO_TRAILING_SLASH +#define mkdir(a,b) compat_mkdir_wo_trailing_slash((a),(b)) +int compat_mkdir_wo_trailing_slash(const char*, mode_t); +#endif + +#ifdef time +#undef time +#endif +static inline time_t git_time(time_t *tloc) +{ + struct timeval tv; + + /* + * Avoid time(NULL), which can disagree with gettimeofday(2) + * and filesystem timestamps. + */ + gettimeofday(&tv, NULL); + + if (tloc) + *tloc = tv.tv_sec; + return tv.tv_sec; +} +#define time git_time + +#ifdef NO_STRUCT_ITIMERVAL +struct itimerval { + struct timeval it_interval; + struct timeval it_value; +}; +#endif + +#ifdef NO_SETITIMER +static inline int git_setitimer(int which UNUSED, + const struct itimerval *value UNUSED, + struct itimerval *newvalue UNUSED) { + return 0; /* pretend success */ +} +#undef setitimer +#define setitimer(which,value,ovalue) git_setitimer(which,value,ovalue) +#endif + +#ifndef NO_LIBGEN_H +#include +#else +#define basename gitbasename +char *gitbasename(char *); +#define dirname gitdirname +char *gitdirname(char *); +#endif + +#ifndef NO_ICONV +#include +#endif + +/* On most systems would have given us this, but + * not on some systems (e.g. z/OS). + */ +#ifndef NI_MAXHOST +#define NI_MAXHOST 1025 +#endif + +#ifndef NI_MAXSERV +#define NI_MAXSERV 32 +#endif + +/* On most systems would have given us this, but + * not on some systems (e.g. GNU/Hurd). + */ +#ifndef PATH_MAX +#define PATH_MAX 4096 +#endif + +#ifndef NAME_MAX +#define NAME_MAX 255 +#endif + +typedef uintmax_t timestamp_t; +#define PRItime PRIuMAX +#define parse_timestamp strtoumax +#define TIME_MAX UINTMAX_MAX +#define TIME_MIN 0 + +int lstat_cache_aware_rmdir(const char *path); +#if !defined(__MINGW32__) && !defined(_MSC_VER) +#define rmdir lstat_cache_aware_rmdir +#endif + +#if defined(NO_MMAP) || defined(USE_WIN32_MMAP) + +#ifndef PROT_READ +#define PROT_READ 1 +#define PROT_WRITE 2 +#define MAP_PRIVATE 1 +#endif + +#define mmap git_mmap +#define munmap git_munmap +void *git_mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset); +int git_munmap(void *start, size_t length); + +#else /* NO_MMAP || USE_WIN32_MMAP */ + +#include + +#endif /* NO_MMAP || USE_WIN32_MMAP */ + +#ifndef MAP_FAILED +#define MAP_FAILED ((void *)-1) +#endif + +#ifdef NEEDS_MODE_TRANSLATION +#undef S_IFMT +#undef S_IFREG +#undef S_IFDIR +#undef S_IFLNK +#undef S_IFBLK +#undef S_IFCHR +#undef S_IFIFO +#undef S_IFSOCK +#define S_IFMT 0170000 +#define S_IFREG 0100000 +#define S_IFDIR 0040000 +#define S_IFLNK 0120000 +#define S_IFBLK 0060000 +#define S_IFCHR 0020000 +#define S_IFIFO 0010000 +#define S_IFSOCK 0140000 +#ifdef stat +#undef stat +#endif +#define stat(path, buf) git_stat(path, buf) +int git_stat(const char *, struct stat *); +#ifdef fstat +#undef fstat +#endif +#define fstat(fd, buf) git_fstat(fd, buf) +int git_fstat(int, struct stat *); +#ifdef lstat +#undef lstat +#endif +#define lstat(path, buf) git_lstat(path, buf) +int git_lstat(const char *, struct stat *); +#endif + +#ifdef NO_PREAD +#define pread git_pread +ssize_t git_pread(int fd, void *buf, size_t count, off_t offset); +#endif + +#ifdef NO_SETENV +#define setenv gitsetenv +int gitsetenv(const char *, const char *, int); +#endif + +#ifdef NO_MKDTEMP +#define mkdtemp gitmkdtemp +char *gitmkdtemp(char *); +#endif + +#ifdef NO_UNSETENV +#define unsetenv gitunsetenv +int gitunsetenv(const char *); +#endif + +#ifdef NO_STRCASESTR +#define strcasestr gitstrcasestr +char *gitstrcasestr(const char *haystack, const char *needle); +#endif + +#ifdef NO_STRLCPY +#define strlcpy gitstrlcpy +size_t gitstrlcpy(char *, const char *, size_t); +#endif + +#ifdef NO_STRTOUMAX +#define strtoumax gitstrtoumax +uintmax_t gitstrtoumax(const char *, char **, int); +#define strtoimax gitstrtoimax +intmax_t gitstrtoimax(const char *, char **, int); +#endif + +#ifdef NO_HSTRERROR +#define hstrerror githstrerror +const char *githstrerror(int herror); +#endif + +#ifdef NO_MEMMEM +#define memmem gitmemmem +void *gitmemmem(const void *haystack, size_t haystacklen, + const void *needle, size_t needlelen); +#endif + +#ifdef OVERRIDE_STRDUP +#ifdef strdup +#undef strdup +#endif +#define strdup gitstrdup +char *gitstrdup(const char *s); +#endif + +#ifdef NO_GETPAGESIZE +#define getpagesize() sysconf(_SC_PAGESIZE) +#endif + +#ifndef O_CLOEXEC +#define O_CLOEXEC 0 +#endif + +#ifdef FREAD_READS_DIRECTORIES +# if !defined(SUPPRESS_FOPEN_REDEFINITION) +# ifdef fopen +# undef fopen +# endif +# define fopen(a,b) git_fopen(a,b) +# endif +FILE *git_fopen(const char*, const char*); +#endif + +#ifdef SNPRINTF_RETURNS_BOGUS +#ifdef snprintf +#undef snprintf +#endif +#define snprintf git_snprintf +int git_snprintf(char *str, size_t maxsize, + const char *format, ...); +#ifdef vsnprintf +#undef vsnprintf +#endif +#define vsnprintf git_vsnprintf +int git_vsnprintf(char *str, size_t maxsize, + const char *format, va_list ap); +#endif + +#ifdef OPEN_RETURNS_EINTR +#undef open +#define open git_open_with_retry +int git_open_with_retry(const char *path, int flag, ...); +#endif + +#ifdef __GLIBC_PREREQ +#if __GLIBC_PREREQ(2, 1) +#define HAVE_STRCHRNUL +#endif +#endif + +#ifndef HAVE_STRCHRNUL +#define strchrnul gitstrchrnul +static inline char *gitstrchrnul(const char *s, int c) +{ + while (*s && *s != c) + s++; + return (char *)s; +} +#endif + +#ifdef NO_INET_PTON +int inet_pton(int af, const char *src, void *dst); +#endif + +#ifdef NO_INET_NTOP +const char *inet_ntop(int af, const void *src, char *dst, size_t size); +#endif + +#ifdef NO_PTHREADS +#define atexit git_atexit +int git_atexit(void (*handler)(void)); +#endif + +#ifndef HOST_NAME_MAX +#define HOST_NAME_MAX 256 +#endif + +#include "../sane-ctype.h" + +void git_stable_qsort(void *base, size_t nmemb, size_t size, + int(*compar)(const void *, const void *)); +#ifdef INTERNAL_QSORT +#define qsort git_stable_qsort +#endif + +#define QSORT(base, n, compar) sane_qsort((base), (n), sizeof(*(base)), compar) +static inline void sane_qsort(void *base, size_t nmemb, size_t size, + int(*compar)(const void *, const void *)) +{ + if (nmemb > 1) + qsort(base, nmemb, size, compar); +} + +#define STABLE_QSORT(base, n, compar) \ + git_stable_qsort((base), (n), sizeof(*(base)), compar) + +#ifndef HAVE_ISO_QSORT_S +int git_qsort_s(void *base, size_t nmemb, size_t size, + int (*compar)(const void *, const void *, void *), void *ctx); +#define qsort_s git_qsort_s +#endif + +#define QSORT_S(base, n, compar, ctx) do { \ + if (qsort_s((base), (n), sizeof(*(base)), compar, ctx)) \ + BUG("qsort_s() failed"); \ +} while (0) + +#ifdef NO_NSEC +#undef USE_NSEC +#define ST_CTIME_NSEC(st) 0 +#define ST_MTIME_NSEC(st) 0 +#else +#ifdef USE_ST_TIMESPEC +#define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctimespec.tv_nsec)) +#define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtimespec.tv_nsec)) +#else +#define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctim.tv_nsec)) +#define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtim.tv_nsec)) +#endif +#endif + +#ifndef va_copy +/* + * Since an obvious implementation of va_list would be to make it a + * pointer into the stack frame, a simple assignment will work on + * many systems. But let's try to be more portable. + */ +#ifdef __va_copy +#define va_copy(dst, src) __va_copy(dst, src) +#else +#define va_copy(dst, src) ((dst) = (src)) +#endif +#endif + +#ifndef _POSIX_THREAD_SAFE_FUNCTIONS +static inline void git_flockfile(FILE *fh UNUSED) +{ + ; /* nothing */ +} +static inline void git_funlockfile(FILE *fh UNUSED) +{ + ; /* nothing */ +} +#undef flockfile +#undef funlockfile +#undef getc_unlocked +#define flockfile(fh) git_flockfile(fh) +#define funlockfile(fh) git_funlockfile(fh) +#define getc_unlocked(fh) getc(fh) +#endif + +#ifdef FILENO_IS_A_MACRO +int git_fileno(FILE *stream); +# ifndef COMPAT_CODE_FILENO +# undef fileno +# define fileno(p) git_fileno(p) +# endif +#endif + +#ifdef NEED_ACCESS_ROOT_HANDLER +int git_access(const char *path, int mode); +# ifndef COMPAT_CODE_ACCESS +# ifdef access +# undef access +# endif +# define access(path, mode) git_access(path, mode) +# endif +#endif + +#endif /* COMPAT_POSIX_H */ diff --git a/git-compat-util.h b/git-compat-util.h index e123288e8f1393..03e2ba59d3e049 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -23,26 +23,9 @@ #include #endif -struct strbuf; - - -#define _FILE_OFFSET_BITS 64 +#include "compat/posix.h" - -/* Derived from Linux "Features Test Macro" header - * Convenience macros to test the versions of gcc (or - * a compatible compiler). - * Use them like this: - * #if GIT_GNUC_PREREQ (2,8) - * ... code requiring gcc 2.8 or later ... - * #endif -*/ -#if defined(__GNUC__) && defined(__GNUC_MINOR__) -# define GIT_GNUC_PREREQ(maj, min) \ - ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min)) -#else - #define GIT_GNUC_PREREQ(maj, min) 0 -#endif +struct strbuf; #if defined(__GNUC__) || defined(__clang__) # define PRAGMA(pragma) _Pragma(#pragma) @@ -176,71 +159,6 @@ DISABLE_WARNING(-Wsign-compare) /* Approximation of the length of the decimal representation of this type. */ #define decimal_length(x) ((int)(sizeof(x) * 2.56 + 0.5) + 1) -#ifdef __MINGW64__ -#define _POSIX_C_SOURCE 1 -#elif defined(__sun__) - /* - * On Solaris, when _XOPEN_EXTENDED is set, its header file - * forces the programs to be XPG4v2, defeating any _XOPEN_SOURCE - * setting to say we are XPG5 or XPG6. Also on Solaris, - * XPG6 programs must be compiled with a c99 compiler, while - * non XPG6 programs must be compiled with a pre-c99 compiler. - */ -# if __STDC_VERSION__ - 0 >= 199901L -# define _XOPEN_SOURCE 600 -# else -# define _XOPEN_SOURCE 500 -# endif -#elif !defined(__APPLE__) && !defined(__FreeBSD__) && !defined(__USLC__) && \ - !defined(_M_UNIX) && !defined(__sgi) && !defined(__DragonFly__) && \ - !defined(__TANDEM) && !defined(__QNX__) && !defined(__MirBSD__) && \ - !defined(__CYGWIN__) -#define _XOPEN_SOURCE 600 /* glibc2 and AIX 5.3L need 500, OpenBSD needs 600 for S_ISLNK() */ -#define _XOPEN_SOURCE_EXTENDED 1 /* AIX 5.3L needs this */ -#endif -#define _ALL_SOURCE 1 -#define _GNU_SOURCE 1 -#define _BSD_SOURCE 1 -#define _DEFAULT_SOURCE 1 -#define _NETBSD_SOURCE 1 -#define _SGI_SOURCE 1 - -/* - * UNUSED marks a function parameter that is always unused. It also - * can be used to annotate a function, a variable, or a type that is - * always unused. - * - * A callback interface may dictate that a function accepts a - * parameter at that position, but the implementation of the function - * may not need to use the parameter. In such a case, mark the parameter - * with UNUSED. - * - * When a parameter may be used or unused, depending on conditional - * compilation, consider using MAYBE_UNUSED instead. - */ -#if GIT_GNUC_PREREQ(4, 5) -#define UNUSED __attribute__((unused)) \ - __attribute__((deprecated ("parameter declared as UNUSED"))) -#elif defined(__GNUC__) -#define UNUSED __attribute__((unused)) \ - __attribute__((deprecated)) -#else -#define UNUSED -#endif - -#if defined(WIN32) && !defined(__CYGWIN__) /* Both MinGW and MSVC */ -# if !defined(_WIN32_WINNT) -# define _WIN32_WINNT 0x0600 -# endif -#define WIN32_LEAN_AND_MEAN /* stops windows.h including winsock.h */ -#include -#ifndef NO_UNIX_SOCKETS -#include -#endif -#include -#define GIT_WINDOWS_NATIVE -#endif - #if defined(NO_UNIX_SOCKETS) || !defined(GIT_WINDOWS_NATIVE) static inline int _have_unix_sockets(void) { @@ -253,45 +171,6 @@ static inline int _have_unix_sockets(void) #define have_unix_sockets _have_unix_sockets #endif -#include -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef HAVE_STRINGS_H -#include /* for strcasecmp() */ -#endif -#include -#include -#include -#ifdef NEEDS_SYS_PARAM_H -#include -#endif -#include -#include -#include -#include -#include -#include -#include -#include -#include -#if !defined(NO_POLL_H) -#include -#elif !defined(NO_SYS_POLL_H) -#include -#else -/* Pull the compat stuff */ -#include -#endif -#ifdef HAVE_BSD_SYSCTL -#include -#endif - /* Used by compat/win32/path-utils.h, and more */ static inline int is_xplatform_dir_sep(int c) { @@ -308,48 +187,6 @@ static inline int is_xplatform_dir_sep(int c) #elif defined(_MSC_VER) #include "compat/win32/path-utils.h" #include "compat/msvc.h" -#else -#include -#include -#include -#include -#include -#include -#include -#ifndef NO_SYS_SELECT_H -#include -#endif -#include -#include -#include -#include -#include -#include -#ifndef NO_INTTYPES_H -#include -#else -#include -#endif -#ifdef HAVE_ARC4RANDOM_LIBBSD -#include -#endif -#ifdef HAVE_GETRANDOM -#include -#endif -#ifdef NO_INTPTR_T -/* - * On I16LP32, ILP32 and LP64 "long" is the safe bet, however - * on LLP86, IL33LLP64 and P64 it needs to be "long long", - * while on IP16 and IP16L32 it is "int" (resp. "short") - * Size needs to match (or exceed) 'sizeof(void *)'. - * We can't take "long long" here as not everybody has it. - */ -typedef long intptr_t; -typedef unsigned long uintptr_t; -#endif -#undef _ALL_SOURCE /* AIX 5.3L defines a struct list with _ALL_SOURCE. */ -#include -#define _ALL_SOURCE 1 #endif /* used on Mac OS X */ @@ -370,60 +207,6 @@ static inline const char *precompose_string_if_needed(const char *in) #define probe_utf8_pathname_composition() #endif -#ifdef MKDIR_WO_TRAILING_SLASH -#define mkdir(a,b) compat_mkdir_wo_trailing_slash((a),(b)) -int compat_mkdir_wo_trailing_slash(const char*, mode_t); -#endif - -#ifdef time -#undef time -#endif -static inline time_t git_time(time_t *tloc) -{ - struct timeval tv; - - /* - * Avoid time(NULL), which can disagree with gettimeofday(2) - * and filesystem timestamps. - */ - gettimeofday(&tv, NULL); - - if (tloc) - *tloc = tv.tv_sec; - return tv.tv_sec; -} -#define time git_time - -#ifdef NO_STRUCT_ITIMERVAL -struct itimerval { - struct timeval it_interval; - struct timeval it_value; -}; -#endif - -#ifdef NO_SETITIMER -static inline int git_setitimer(int which UNUSED, - const struct itimerval *value UNUSED, - struct itimerval *newvalue UNUSED) { - return 0; /* pretend success */ -} -#undef setitimer -#define setitimer(which,value,ovalue) git_setitimer(which,value,ovalue) -#endif - -#ifndef NO_LIBGEN_H -#include -#else -#define basename gitbasename -char *gitbasename(char *); -#define dirname gitdirname -char *gitdirname(char *); -#endif - -#ifndef NO_ICONV -#include -#endif - #ifndef NO_OPENSSL #ifdef __APPLE__ #undef __AVAILABILITY_MACROS_USES_AVAILABILITY @@ -441,34 +224,6 @@ char *gitdirname(char *); # include #endif -/* On most systems would have given us this, but - * not on some systems (e.g. z/OS). - */ -#ifndef NI_MAXHOST -#define NI_MAXHOST 1025 -#endif - -#ifndef NI_MAXSERV -#define NI_MAXSERV 32 -#endif - -/* On most systems would have given us this, but - * not on some systems (e.g. GNU/Hurd). - */ -#ifndef PATH_MAX -#define PATH_MAX 4096 -#endif - -#ifndef NAME_MAX -#define NAME_MAX 255 -#endif - -typedef uintmax_t timestamp_t; -#define PRItime PRIuMAX -#define parse_timestamp strtoumax -#define TIME_MAX UINTMAX_MAX -#define TIME_MIN 0 - #ifndef PATH_SEP #define PATH_SEP ':' #endif @@ -492,11 +247,6 @@ static inline int noop_core_config(const char *var UNUSED, #define platform_core_config noop_core_config #endif -int lstat_cache_aware_rmdir(const char *path); -#if !defined(__MINGW32__) && !defined(_MSC_VER) -#define rmdir lstat_cache_aware_rmdir -#endif - #ifndef has_dos_drive_prefix static inline int git_has_dos_drive_prefix(const char *path UNUSED) { @@ -824,25 +574,6 @@ static inline bool strip_suffix(const char *str, const char *suffix, memcpy(_swap_b_ptr, _swap_buffer, sizeof(a)); \ } while (0) -#if defined(NO_MMAP) || defined(USE_WIN32_MMAP) - -#ifndef PROT_READ -#define PROT_READ 1 -#define PROT_WRITE 2 -#define MAP_PRIVATE 1 -#endif - -#define mmap git_mmap -#define munmap git_munmap -void *git_mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset); -int git_munmap(void *start, size_t length); - -#else /* NO_MMAP || USE_WIN32_MMAP */ - -#include - -#endif /* NO_MMAP || USE_WIN32_MMAP */ - #ifdef NO_MMAP /* This value must be multiple of (pagesize * 2) */ @@ -858,177 +589,15 @@ int git_munmap(void *start, size_t length); #endif /* NO_MMAP */ -#ifndef MAP_FAILED -#define MAP_FAILED ((void *)-1) -#endif - #ifdef NO_ST_BLOCKS_IN_STRUCT_STAT #define on_disk_bytes(st) ((st).st_size) #else #define on_disk_bytes(st) ((st).st_blocks * 512) #endif -#ifdef NEEDS_MODE_TRANSLATION -#undef S_IFMT -#undef S_IFREG -#undef S_IFDIR -#undef S_IFLNK -#undef S_IFBLK -#undef S_IFCHR -#undef S_IFIFO -#undef S_IFSOCK -#define S_IFMT 0170000 -#define S_IFREG 0100000 -#define S_IFDIR 0040000 -#define S_IFLNK 0120000 -#define S_IFBLK 0060000 -#define S_IFCHR 0020000 -#define S_IFIFO 0010000 -#define S_IFSOCK 0140000 -#ifdef stat -#undef stat -#endif -#define stat(path, buf) git_stat(path, buf) -int git_stat(const char *, struct stat *); -#ifdef fstat -#undef fstat -#endif -#define fstat(fd, buf) git_fstat(fd, buf) -int git_fstat(int, struct stat *); -#ifdef lstat -#undef lstat -#endif -#define lstat(path, buf) git_lstat(path, buf) -int git_lstat(const char *, struct stat *); -#endif - #define DEFAULT_PACKED_GIT_LIMIT \ ((1024L * 1024L) * (size_t)(sizeof(void*) >= 8 ? (32 * 1024L * 1024L) : 256)) -#ifdef NO_PREAD -#define pread git_pread -ssize_t git_pread(int fd, void *buf, size_t count, off_t offset); -#endif - -#ifdef NO_SETENV -#define setenv gitsetenv -int gitsetenv(const char *, const char *, int); -#endif - -#ifdef NO_MKDTEMP -#define mkdtemp gitmkdtemp -char *gitmkdtemp(char *); -#endif - -#ifdef NO_UNSETENV -#define unsetenv gitunsetenv -int gitunsetenv(const char *); -#endif - -#ifdef NO_STRCASESTR -#define strcasestr gitstrcasestr -char *gitstrcasestr(const char *haystack, const char *needle); -#endif - -#ifdef NO_STRLCPY -#define strlcpy gitstrlcpy -size_t gitstrlcpy(char *, const char *, size_t); -#endif - -#ifdef NO_STRTOUMAX -#define strtoumax gitstrtoumax -uintmax_t gitstrtoumax(const char *, char **, int); -#define strtoimax gitstrtoimax -intmax_t gitstrtoimax(const char *, char **, int); -#endif - -#ifdef NO_HSTRERROR -#define hstrerror githstrerror -const char *githstrerror(int herror); -#endif - -#ifdef NO_MEMMEM -#define memmem gitmemmem -void *gitmemmem(const void *haystack, size_t haystacklen, - const void *needle, size_t needlelen); -#endif - -#ifdef OVERRIDE_STRDUP -#ifdef strdup -#undef strdup -#endif -#define strdup gitstrdup -char *gitstrdup(const char *s); -#endif - -#ifdef NO_GETPAGESIZE -#define getpagesize() sysconf(_SC_PAGESIZE) -#endif - -#ifndef O_CLOEXEC -#define O_CLOEXEC 0 -#endif - -#ifdef FREAD_READS_DIRECTORIES -# if !defined(SUPPRESS_FOPEN_REDEFINITION) -# ifdef fopen -# undef fopen -# endif -# define fopen(a,b) git_fopen(a,b) -# endif -FILE *git_fopen(const char*, const char*); -#endif - -#ifdef SNPRINTF_RETURNS_BOGUS -#ifdef snprintf -#undef snprintf -#endif -#define snprintf git_snprintf -int git_snprintf(char *str, size_t maxsize, - const char *format, ...); -#ifdef vsnprintf -#undef vsnprintf -#endif -#define vsnprintf git_vsnprintf -int git_vsnprintf(char *str, size_t maxsize, - const char *format, va_list ap); -#endif - -#ifdef OPEN_RETURNS_EINTR -#undef open -#define open git_open_with_retry -int git_open_with_retry(const char *path, int flag, ...); -#endif - -#ifdef __GLIBC_PREREQ -#if __GLIBC_PREREQ(2, 1) -#define HAVE_STRCHRNUL -#endif -#endif - -#ifndef HAVE_STRCHRNUL -#define strchrnul gitstrchrnul -static inline char *gitstrchrnul(const char *s, int c) -{ - while (*s && *s != c) - s++; - return (char *)s; -} -#endif - -#ifdef NO_INET_PTON -int inet_pton(int af, const char *src, void *dst); -#endif - -#ifdef NO_INET_NTOP -const char *inet_ntop(int af, const void *src, char *dst, size_t size); -#endif - -#ifdef NO_PTHREADS -#define atexit git_atexit -int git_atexit(void (*handler)(void)); -#endif - static inline size_t st_add(size_t a, size_t b) { if (unsigned_add_overflows(a, b)) @@ -1295,12 +864,6 @@ static inline size_t xsize_t(off_t len) return (size_t) len; } -#ifndef HOST_NAME_MAX -#define HOST_NAME_MAX 256 -#endif - -#include "sane-ctype.h" - /* * Like skip_prefix, but compare case-insensitively. Note that the comparison * is done via tolower(), so it is strictly ASCII (no multi-byte characters or @@ -1366,34 +929,6 @@ static inline int strtol_i(char const *s, int base, int *result) return 0; } -void git_stable_qsort(void *base, size_t nmemb, size_t size, - int(*compar)(const void *, const void *)); -#ifdef INTERNAL_QSORT -#define qsort git_stable_qsort -#endif - -#define QSORT(base, n, compar) sane_qsort((base), (n), sizeof(*(base)), compar) -static inline void sane_qsort(void *base, size_t nmemb, size_t size, - int(*compar)(const void *, const void *)) -{ - if (nmemb > 1) - qsort(base, nmemb, size, compar); -} - -#define STABLE_QSORT(base, n, compar) \ - git_stable_qsort((base), (n), sizeof(*(base)), compar) - -#ifndef HAVE_ISO_QSORT_S -int git_qsort_s(void *base, size_t nmemb, size_t size, - int (*compar)(const void *, const void *, void *), void *ctx); -#define qsort_s git_qsort_s -#endif - -#define QSORT_S(base, n, compar, ctx) do { \ - if (qsort_s((base), (n), sizeof(*(base)), compar, ctx)) \ - BUG("qsort_s() failed"); \ -} while (0) - #ifndef REG_STARTEND #error "Git requires REG_STARTEND support. Compile with NO_REGEX=NeedsStartEnd" #endif @@ -1418,39 +953,12 @@ int git_regcomp(regex_t *preg, const char *pattern, int cflags); # define FORCE_DIR_SET_GID 0 #endif -#ifdef NO_NSEC -#undef USE_NSEC -#define ST_CTIME_NSEC(st) 0 -#define ST_MTIME_NSEC(st) 0 -#else -#ifdef USE_ST_TIMESPEC -#define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctimespec.tv_nsec)) -#define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtimespec.tv_nsec)) -#else -#define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctim.tv_nsec)) -#define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtim.tv_nsec)) -#endif -#endif - #ifdef UNRELIABLE_FSTAT #define fstat_is_reliable() 0 #else #define fstat_is_reliable() 1 #endif -#ifndef va_copy -/* - * Since an obvious implementation of va_list would be to make it a - * pointer into the stack frame, a simple assignment will work on - * many systems. But let's try to be more portable. - */ -#ifdef __va_copy -#define va_copy(dst, src) __va_copy(dst, src) -#else -#define va_copy(dst, src) ((dst) = (src)) -#endif -#endif - /* usage.c: only to be used for testing BUG() implementation (see test-tool) */ extern int BUG_exit_code; @@ -1480,41 +988,6 @@ void bug_fl(const char *file, int line, const char *fmt, ...); # define SHELL_PATH "/bin/sh" #endif -#ifndef _POSIX_THREAD_SAFE_FUNCTIONS -static inline void git_flockfile(FILE *fh UNUSED) -{ - ; /* nothing */ -} -static inline void git_funlockfile(FILE *fh UNUSED) -{ - ; /* nothing */ -} -#undef flockfile -#undef funlockfile -#undef getc_unlocked -#define flockfile(fh) git_flockfile(fh) -#define funlockfile(fh) git_funlockfile(fh) -#define getc_unlocked(fh) getc(fh) -#endif - -#ifdef FILENO_IS_A_MACRO -int git_fileno(FILE *stream); -# ifndef COMPAT_CODE_FILENO -# undef fileno -# define fileno(p) git_fileno(p) -# endif -#endif - -#ifdef NEED_ACCESS_ROOT_HANDLER -int git_access(const char *path, int mode); -# ifndef COMPAT_CODE_ACCESS -# ifdef access -# undef access -# endif -# define access(path, mode) git_access(path, mode) -# endif -#endif - /* * Our code often opens a path to an optional file, to work on its * contents when we can successfully open it. We can ignore a failure From 6af23ac66cc5050aa4aa2adb5037196536c824e3 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 18 Feb 2025 10:20:53 +0100 Subject: [PATCH 018/775] reftable: decouple from Git codebase by pulling in "compat/posix.h" The reftable library includes "git-compat-util.h" in order to get a POSIX-like programming environment that papers over various differences between platforms. The header also brings with it a couple of helpers specific to the Git codebase though, and over time we have started to use these helpers in the reftable library, as well. This makes it very hard to use the reftable library as a standalone library without the rest of the Git codebase, so other libraries like e.g. libgit2 cannot easily use it. But now that we have removed all calls to Git-specific functionality and have split out "compat/posix.h" as a separate header we can address this. Stop including "git-compat-util.h" and instead include "compat/posix.h" to finalize the decoupling of the reftable library from the rest of the Git codebase. The only bits which remain specific to Git are "system.h" and "system.c", which projects will have to provide. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/system.c | 2 ++ reftable/system.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/reftable/system.c b/reftable/system.c index e25ccc0da3ccb8..1ee268b125ddb6 100644 --- a/reftable/system.c +++ b/reftable/system.c @@ -1,3 +1,5 @@ +#include "../git-compat-util.h" + #include "system.h" #include "basics.h" #include "reftable-error.h" diff --git a/reftable/system.h b/reftable/system.h index bb6a7e6285b840..10055fbff2d6e0 100644 --- a/reftable/system.h +++ b/reftable/system.h @@ -11,7 +11,7 @@ license that can be found in the LICENSE file or at /* This header glues the reftable library to the rest of Git */ -#include "git-compat-util.h" +#include "compat/posix.h" #include "compat/zlib-compat.h" /* From 8f6a2dbe34017e616f8bb35bc668538eb11a8d33 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 18 Feb 2025 10:20:54 +0100 Subject: [PATCH 019/775] Makefile: skip reftable library for Coccinelle The reftable library does not use any of the common helpers that the Git project has. Consequently, most of the rules that we have in Coccinelle do not apply to the library at all and may even generate false positives when a pattern can be converted to use a Git helper function. Exclude reftable library sources from being checked by Coccinelle to avoid such false positives. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index cd0806e1e69223..83f1df5502b5d5 100644 --- a/Makefile +++ b/Makefile @@ -955,7 +955,7 @@ FOUND_SOURCE_FILES := $(filter-out $(GENERATED_H),$(shell $(SOURCES_CMD))) FOUND_C_SOURCES = $(filter %.c,$(FOUND_SOURCE_FILES)) FOUND_H_SOURCES = $(filter %.h,$(FOUND_SOURCE_FILES)) -COCCI_SOURCES = $(filter-out $(THIRD_PARTY_SOURCES),$(FOUND_C_SOURCES)) +COCCI_SOURCES = $(filter-out $(THIRD_PARTY_SOURCES) reftable/%,$(FOUND_C_SOURCES)) LIB_H = $(FOUND_H_SOURCES) From 3262a53c123844b3b9140f4e35b86f1444555aad Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 6 Feb 2025 08:53:58 +0100 Subject: [PATCH 020/775] reftable: ignore file-in-use errors when unlink(3p) fails on Windows Unlinking a file may fail on Windows systems when the file is still held open by another process. This is incompatible with POSIX semantics and by extension with Git's assumed semantics when unlinking files, which is that files can be unlinked regardless of whether they are still open or not. To counteract this incompatibility, we have some custom error handling in the `mingw_unlink()` wrapper that first retries the deletion with some delay, and then asks the user whether we should continue to retry. While this logic might be sensible in many callsites throughout Git, it is less when used in the reftable library. We only use unlink(3) there to delete tables which aren't referenced anymore, and the code is very aware of the limitations on Windows. As such, all calls to unlink(3p) don't perform any error checking at all and are fine with the call failing. Instead, the library provides the `reftable_stack_clean()` function, which Git knows to execute in git-pack-refs(1) after compacting a stack. The effect of this function is that all stale tables will eventually get deleted once they aren't kept open anymore. So while we're fine with unlink(3p) failing, the Windows-emulation of that function will still perform several sleeps and ultimately end up asking the user: $ git pack-refs Unlink of file 'C:/temp/jgittest/jgit/.git/reftable/0x000000000002-0x000000000004-50486d0e.ref' failed. Should I try again? (y/n) n Unlink of file 'C:/temp/jgittest/jgit/.git/reftable/0x000000000002-0x000000000004-50486d0e.ref' failed. Should I try again? (y/n) n Unlink of file 'C:/temp/jgittest/jgit/.git/reftable/0x000000000002-0x000000000004-50486d0e.ref' failed. Should I try again? (y/n) n It even asks multiple times, which is doubly annoying and puzzling to the user: 1. It asks when trying to delete the old file after having written the compacted stack. 2. It asks when reloading the stack, where it will try to unlink now-unreferenced tables. 3. It asks when calling `reftable_stack_clean()`, where it will try to unlink now-stale tables. Fix the issue by making it possible to disable this behaviour with a preprocessor define. As "git-compat-util.h" is only included from "system.h", and given that "system.h" is only ever included by headers and code that are internal to the reftable library, we can set that macro in this header without impacting anything else but the reftable library. Reported-by: Christian Reich Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- compat/mingw-posix.h | 8 ++++++-- compat/mingw.c | 5 ++++- reftable/system.h | 1 + 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/compat/mingw-posix.h b/compat/mingw-posix.h index 8dddfa818d6d1a..88e0cf92924bd9 100644 --- a/compat/mingw-posix.h +++ b/compat/mingw-posix.h @@ -201,8 +201,12 @@ int uname(struct utsname *buf); * replacements of existing functions */ -int mingw_unlink(const char *pathname); -#define unlink mingw_unlink +int mingw_unlink(const char *pathname, int handle_in_use_error); +#ifdef MINGW_DONT_HANDLE_IN_USE_ERROR +# define unlink(path) mingw_unlink(path, 0) +#else +# define unlink(path) mingw_unlink(path, 1) +#endif int mingw_rmdir(const char *path); #define rmdir mingw_rmdir diff --git a/compat/mingw.c b/compat/mingw.c index 1d5b211b548dab..0e4b6a70a49d5e 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -302,7 +302,7 @@ static wchar_t *normalize_ntpath(wchar_t *wbuf) return wbuf; } -int mingw_unlink(const char *pathname) +int mingw_unlink(const char *pathname, int handle_in_use_error) { int ret, tries = 0; wchar_t wpathname[MAX_PATH]; @@ -317,6 +317,9 @@ int mingw_unlink(const char *pathname) while ((ret = _wunlink(wpathname)) == -1 && tries < ARRAY_SIZE(delay)) { if (!is_file_in_use_error(GetLastError())) break; + if (!handle_in_use_error) + return ret; + /* * We assume that some other process had the source or * destination file open at the wrong moment and retry. diff --git a/reftable/system.h b/reftable/system.h index 10055fbff2d6e0..072d9daea0f4fa 100644 --- a/reftable/system.h +++ b/reftable/system.h @@ -11,6 +11,7 @@ license that can be found in the LICENSE file or at /* This header glues the reftable library to the rest of Git */ +#define MINGW_DONT_HANDLE_IN_USE_ERROR #include "compat/posix.h" #include "compat/zlib-compat.h" From 8216cf9419a3a8dfcd65b4caad72c4e6cb9c0513 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 25 Feb 2025 01:28:24 -0500 Subject: [PATCH 021/775] loose_object_info(): BUG() on inflating content with unknown type After unpack_loose_header() returns, it will have inflated not only the object header, but possibly some bytes of the object content. When we call unpack_loose_rest() to extract the actual content, it finds those extra bytes by skipping past the header's terminating NUL in the buffer. Like this: int bytes = strlen(buffer) + 1; n = stream->total_out - bytes; ... memcpy(buf, (char *) buffer + bytes, n); This won't work with the OBJECT_INFO_ALLOW_UNKNOWN_TYPE flag, as there we allow a header of arbitrary size. We put into a strbuf, but feed only the final 32-byte chunk we read to unpack_loose_rest(). In that case stream->total_out may unexpectedly large, and thus our "n" will be large, causing an out-of-bounds read (we do check it against our allocated buffer size, which prevents an out-of-bounds write). Probably this could be made to work by feeding the strbuf to unpack_loose_rest(), along with adjusting some types (e.g., "bytes" would need to be a size_t, since it is no longer operating on a 32-byte buffer). But I don't think it's possible to actually trigger this in practice. The only caller who passes ALLOW_UNKNOWN_TYPE is cat-file, which only allows it with the "-t" and "-s" options (neither of which access the content). There is one way you can _almost_ trigger it: the oid compat routines (i.e., accessing sha1 via sha256 names and vice versa) will convert objects on the fly (which requires access to the content) using the same flags that were passed in. So in theory this: t='some very large type field that causes an extra inflate call' sha1_oid=$(git hash-object -w -t "$t" file) sha256_oid=$(git rev-parse --output-object-format=sha256 $sha1_oid) git cat-file --allow-unknown-type -s $sha256_oid would try to access the content. But it doesn't work, because using compat objects requires an entry in the .git/objects/loose-object-idx file, and we don't generate such an entry for non-standard types (see the "compat" section of write_object_file_literally()). If we use "t=blob" instead, then it does access the compat object, but it doesn't trigger the problem (because "blob" is a standard short type name, and it fits in the initial 32-byte buffer). So given that this is almost a memory error bug, I think it's worth addressing. But because we can't actually trigger the situation, I'm hesitant to try a fix that we can't run. Instead let's document the restriction and protect ourselves from the out-of-bounds read by adding a BUG() check. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- object-file.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/object-file.c b/object-file.c index 619f039ebc7ceb..f36846b5baadcf 100644 --- a/object-file.c +++ b/object-file.c @@ -1491,6 +1491,8 @@ static int loose_object_info(struct repository *r, if (!oi->contentp) break; + if (hdrbuf.len) + BUG("unpacking content with unknown types not yet supported"); *oi->contentp = unpack_loose_rest(&stream, hdr, *oi->sizep, oid); if (*oi->contentp) goto cleanup; From 03e7c454e9bc15f4fa046bd3a5f6147bbd0480e6 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 25 Feb 2025 01:29:00 -0500 Subject: [PATCH 022/775] unpack_loose_header(): simplify next_out assignment When using OBJECT_INFO_ALLOW_UNKNOWN_TYPE to unpack a header that doesn't fit into our initial 32-byte buffer, we loop over calls git_inflate(), feeding it our buffer to the "next_out" pointer each time. As the code is written, we reset next_out after each inflate call (and after reading the output), ready for the next loop. This isn't wrong, but there are a few advantages to setting up "next_out" right before each inflate call, rather than after: 1. It drops a few duplicated lines of code. 2. It makes it obvious that we always feed a fresh buffer on each call (and thus can never see Z_BUF_ERROR due to due to a lack of output space). 3. After we exit the loop, we'll leave stream->next_out pointing to the end of the fetched data (this is how zlib callers find out how much data is in the buffer). This doesn't matter in practice, since nobody looks at it again. But it's probably the least-surprising thing to do, as it matches how next_out is left when the whole thing fits in the initial 32-byte buffer (and we don't enter the loop at all). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- object-file.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/object-file.c b/object-file.c index f36846b5baadcf..e48da375bd65d3 100644 --- a/object-file.c +++ b/object-file.c @@ -1296,18 +1296,17 @@ enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, * reading the stream. */ strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer); - stream->next_out = buffer; - stream->avail_out = bufsiz; do { + stream->next_out = buffer; + stream->avail_out = bufsiz; + obj_read_unlock(); status = git_inflate(stream, 0); obj_read_lock(); strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer); if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer)) return 0; - stream->next_out = buffer; - stream->avail_out = bufsiz; } while (status != Z_STREAM_END); return ULHR_TOO_LONG; } From e7ac344d7018d4537eda29d5a09c047a35f27364 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 25 Feb 2025 01:29:40 -0500 Subject: [PATCH 023/775] unpack_loose_header(): report headers without NUL as "bad" If a caller asks us to read the whole loose object header value into a strbuf (e.g., via "cat-file --allow-unknown-type"), we'll keep reading until we see a NUL byte marking the end of the header. If we hit Z_STREAM_END before seeing the NUL, we obviously have to stop. But we return ULHR_TOO_LONG, which doesn't make any sense. The "too long" return code is used in the normal, 32-byte limited mode to indicate that we stopped looking. There is no such thing as "too long" here, as we'd keep reading forever until we see the end of stream or the NUL. Instead, we should return ULHR_BAD. The loose object has no NUL marking the end of header, so it is malformed. The behavior difference is slight; in either case we'd consider the object unreadable and refuse to go further. The only difference is the specific error message we produce. There's no test case here, as we'd need to generate a valid zlib stream without a NUL. That's not something Git will do without writing new custom code. And in the next patch we'll fix another bug in this area which will make this easier to do (and we will test it then). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- object-file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/object-file.c b/object-file.c index e48da375bd65d3..b1c33dbb63691b 100644 --- a/object-file.c +++ b/object-file.c @@ -1308,7 +1308,7 @@ enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer)) return 0; } while (status != Z_STREAM_END); - return ULHR_TOO_LONG; + return ULHR_BAD; } static void *unpack_loose_rest(git_zstream *stream, From b748ddb7a470b952b8a5596649f7433278d7f2c4 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 25 Feb 2025 01:29:58 -0500 Subject: [PATCH 024/775] unpack_loose_header(): fix infinite loop on broken zlib input When reading a loose object, we first try to expand the first 32 bytes to read the type+size header. This is enough for any of the normal Git types. But since 46f034483e (sha1_file: support reading from a loose object of unknown type, 2015-05-03), the caller can also ask us to parse any unknown names, which can be much longer. In this case we keep inflating until we find the NUL at the end of the header, or hit Z_STREAM_END. But what if zlib can't make forward progress? For example, if the loose object file is truncated, we'll have no more data to feed it. It will return Z_BUF_ERROR, and we'll just loop infinitely, calling git_inflate() over and over but never seeing new bytes nor an end-of-stream marker. We can fix this by only looping when we think we can make forward progress. This will always be Z_OK in this case. In other code we might also be able to continue on Z_BUF_ERROR, but: - We will never see Z_BUF_ERROR because the output buffer is full; we always feed a fresh 32-byte buffer on each call to git_inflate(). - We may see Z_BUF_ERROR if we run out of input. But since we've fed the whole mmap'd buffer to zlib, if it runs out of input there is nothing more we can do. So if we don't see Z_OK (and didn't see the end-of-header NUL, otherwise we'd have broken out of the loop), then we should stop looping and return an error. The test case shows an example where the input is truncated (which gives us the input Z_BUF_ERROR case above). Although we do operate on objects we might get from an untrusted remote, I don't think the security implications of this bug are too great. It can only trigger if both of these are true: - You're reading a loose object whose on-disk representation was written by an attacker. So fetching an object (or receiving a push) are mostly OK, because even with unpack-objects it is our local, trusted code that writes out the object file. The exception may be fetching from an untrusted local repo, or using dumb-http, which copies objects verbatim. But... - The only code path which triggers the inflate loop is cat-file's --allow-unknown-type option. This is unlikely to be called at all outside of debugging. But I also suspect that objects with non-standard types (or that are truncated) would not survive the usual fetch/receive checks in the first place. So I think it would be quite hard to trick somebody into running the infinite loop, and we can just fix the bug. Co-authored-by: Taylor Blau Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- object-file.c | 2 +- t/t1006-cat-file.sh | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/object-file.c b/object-file.c index b1c33dbb63691b..5086633e21289e 100644 --- a/object-file.c +++ b/object-file.c @@ -1307,7 +1307,7 @@ enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer); if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer)) return 0; - } while (status != Z_STREAM_END); + } while (status == Z_OK); return ULHR_BAD; } diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh index e0c6482797e120..78fd970c8a18d5 100755 --- a/t/t1006-cat-file.sh +++ b/t/t1006-cat-file.sh @@ -817,6 +817,25 @@ test_expect_success 'cat-file -t and -s on corrupt loose object' ' ) ' +test_expect_success 'truncated object with --allow-unknown-type' - <<\EOT + objtype='a really long type name that exceeds the 32-byte limit' && + blob=$(git hash-object -w --literally -t "$objtype" /dev/null) && + objpath=.git/objects/$(test_oid_to_path "$blob") && + + # We want to truncate the object far enough in that we don't hit the + # end while inflating the first 32 bytes (since we want to have to dig + # for the trailing NUL of the header). But we don't want to go too far, + # since our header isn't very big. And of course we are counting + # deflated zlib bytes in the on-disk file, so it's a bit of a guess. + # Empirically 50 seems to work. + mv "$objpath" obj.bak && + test_when_finished 'mv obj.bak "$objpath"' && + test_copy_bytes 50 "$objpath" && + + test_must_fail git cat-file --allow-unknown-type -t $blob 2>err && + test_grep "unable to unpack $blob header" err +EOT + # Tests for git cat-file --follow-symlinks test_expect_success 'prep for symlink tests' ' echo_without_newline "$hello_content" >morx && From 0b1493c2d49222ce07b73016bb156fecb5999bb9 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 25 Feb 2025 01:30:26 -0500 Subject: [PATCH 025/775] git_inflate(): skip zlib_post_call() sanity check on Z_NEED_DICT This fixes a case where malformed object input can cause us to hit a BUG() call in the git-zlib.c code. The zlib format allows the use of preset dictionaries to reduce the size of deflated data. The checksum of the dictionary is computed by the deflate code and goes into the stream. On the inflating side, zlib sees the dictionary checksum and returns Z_NEED_DICT, asking the caller to provide the dictionary data via inflateSetDictionary(). This should never happen in Git, because we never provide a dictionary for deflating (and if we get a stream that mentions a dictionary, we have no idea how to provide it). So normally Z_NEED_DICT is a hard error for us. But something interesting happens if we _do_ happen to see it (e.g., because of a corrupt or malicious input). In git_inflate() as we loop over calls to zlib's inflate(), we translate between our large-integer git_zstream values and zlib's native z_stream types, copying in and out with zlib_pre_call() and zlib_post_call(). In zlib_post_call() we have a few sanity checks, including one that checks that the number of bytes consumed by zlib (as measured by it moving the "next_in" pointer) is equal to the movement of its "total_in" count. But these do not correspond when we see Z_NEED_DICT! Zlib consumes the bytes from the input buffer but it does not increment total_in. And so we hit the BUG("total_in mismatch") call. There are a few options here: - We could ditch that BUG() check. It is making too many assumptions about how zlib updates these values. But it does have value in most cases as a sanity check on the values we're copying. - We could skip the zlib_post_call() entirely when we see Z_NEED_DICT. We know that it's hard error for us, so we should just send the status up the stack and let the caller bail. The downside is that if we ever did want to support dictionaries, we couldn't (the git_zstream will be out of sync, since we never copied its values back from the z_stream). - We could continue to call zlib_post_call(), but skip just that BUG() check if the status is Z_NEED_DICT. This keeps git_inflate() as a thin wrapper around inflate(), and would let us later support dictionaries for some calls if we wanted to. This patch uses the third approach. It seems like the least-surprising thing to keep git_inflate() a close to inflate() as possible. And while it makes the diff a bit larger (since we have to pass the status down to to the zlib_post_call() function), it's a static local function, and every caller by definition will have just made a zlib call (and so will have a status integer). Co-authored-by: Taylor Blau Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- git-zlib.c | 27 ++++++++++++++++----------- t/t1006-cat-file.sh | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 11 deletions(-) diff --git a/git-zlib.c b/git-zlib.c index d43bbeb6daa4c1..c2d683528bd011 100644 --- a/git-zlib.c +++ b/git-zlib.c @@ -45,7 +45,7 @@ static void zlib_pre_call(git_zstream *s) s->z.avail_out = zlib_buf_cap(s->avail_out); } -static void zlib_post_call(git_zstream *s) +static void zlib_post_call(git_zstream *s, int status) { unsigned long bytes_consumed; unsigned long bytes_produced; @@ -54,7 +54,12 @@ static void zlib_post_call(git_zstream *s) bytes_produced = s->z.next_out - s->next_out; if (s->z.total_out != s->total_out + bytes_produced) BUG("total_out mismatch"); - if (s->z.total_in != s->total_in + bytes_consumed) + /* + * zlib does not update total_in when it returns Z_NEED_DICT, + * causing a mismatch here. Skip the sanity check in that case. + */ + if (status != Z_NEED_DICT && + s->z.total_in != s->total_in + bytes_consumed) BUG("total_in mismatch"); s->total_out = s->z.total_out; @@ -71,7 +76,7 @@ void git_inflate_init(git_zstream *strm) zlib_pre_call(strm); status = inflateInit(&strm->z); - zlib_post_call(strm); + zlib_post_call(strm, status); if (status == Z_OK) return; die("inflateInit: %s (%s)", zerr_to_string(status), @@ -89,7 +94,7 @@ void git_inflate_init_gzip_only(git_zstream *strm) zlib_pre_call(strm); status = inflateInit2(&strm->z, windowBits); - zlib_post_call(strm); + zlib_post_call(strm, status); if (status == Z_OK) return; die("inflateInit2: %s (%s)", zerr_to_string(status), @@ -102,7 +107,7 @@ void git_inflate_end(git_zstream *strm) zlib_pre_call(strm); status = inflateEnd(&strm->z); - zlib_post_call(strm); + zlib_post_call(strm, status); if (status == Z_OK) return; error("inflateEnd: %s (%s)", zerr_to_string(status), @@ -121,7 +126,7 @@ int git_inflate(git_zstream *strm, int flush) ? 0 : flush); if (status == Z_MEM_ERROR) die("inflate: out of memory"); - zlib_post_call(strm); + zlib_post_call(strm, status); /* * Let zlib work another round, while we can still @@ -163,7 +168,7 @@ void git_deflate_init(git_zstream *strm, int level) memset(strm, 0, sizeof(*strm)); zlib_pre_call(strm); status = deflateInit(&strm->z, level); - zlib_post_call(strm); + zlib_post_call(strm, status); if (status == Z_OK) return; die("deflateInit: %s (%s)", zerr_to_string(status), @@ -179,7 +184,7 @@ static void do_git_deflate_init(git_zstream *strm, int level, int windowBits) status = deflateInit2(&strm->z, level, Z_DEFLATED, windowBits, 8, Z_DEFAULT_STRATEGY); - zlib_post_call(strm); + zlib_post_call(strm, status); if (status == Z_OK) return; die("deflateInit2: %s (%s)", zerr_to_string(status), @@ -210,7 +215,7 @@ int git_deflate_abort(git_zstream *strm) zlib_pre_call(strm); status = deflateEnd(&strm->z); - zlib_post_call(strm); + zlib_post_call(strm, status); return status; } @@ -230,7 +235,7 @@ int git_deflate_end_gently(git_zstream *strm) zlib_pre_call(strm); status = deflateEnd(&strm->z); - zlib_post_call(strm); + zlib_post_call(strm, status); return status; } @@ -247,7 +252,7 @@ int git_deflate(git_zstream *strm, int flush) ? 0 : flush); if (status == Z_MEM_ERROR) die("deflate: out of memory"); - zlib_post_call(strm); + zlib_post_call(strm, status); /* * Let zlib work another round, while we can still diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh index 78fd970c8a18d5..04099f7b4a3a04 100755 --- a/t/t1006-cat-file.sh +++ b/t/t1006-cat-file.sh @@ -836,6 +836,38 @@ test_expect_success 'truncated object with --allow-unknown-type' - <<\EOT test_grep "unable to unpack $blob header" err EOT +test_expect_success 'object reading handles zlib dictionary' - <<\EOT + echo 'content that will be recompressed' >file && + blob=$(git hash-object -w file) && + objpath=.git/objects/$(test_oid_to_path "$blob") && + + # Recompress a loose object using a precomputed zlib dictionary. + # This was originally done with: + # + # perl -MCompress::Raw::Zlib -e ' + # binmode STDIN; + # binmode STDOUT; + # my $data = do { local $/; }; + # my $in = new Compress::Raw::Zlib::Inflate; + # my $de = new Compress::Raw::Zlib::Deflate( + # -Dictionary => "anything" + # ); + # $in->inflate($data, $raw); + # $de->deflate($raw, $out); + # print $out; + # ' $objpath + # + # but we do not want to require the perl module for all test runs (nor + # carry a custom t/helper program that uses zlib features we don't + # otherwise care about). + mv "$objpath" obj.bak && + test_when_finished 'mv obj.bak "$objpath"' && + printf '\170\273\017\112\003\143' >$objpath && + + test_must_fail git cat-file blob $blob 2>err && + test_grep 'error: inflate: needs dictionary' err +EOT + # Tests for git cat-file --follow-symlinks test_expect_success 'prep for symlink tests' ' echo_without_newline "$hello_content" >morx && From 67a6b1aeb82fd4685e862a3a7807d4ed8ea5d899 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 25 Feb 2025 01:30:56 -0500 Subject: [PATCH 026/775] unpack_loose_header(): avoid numeric comparison of zlib status When unpacking a loose header, we try to inflate the first 32 bytes. We'd expect either Z_OK (we filled up the output buffer, but there are more bytes in the object) or Z_STREAM_END (this is a tiny object whose header and content fit in the buffer). We check for that with "if (status < Z_OK)", making the assumption that all of the errors we'd see have negative values (as Z_OK itself is "0", and Z_STREAM_END is "1"). But there's at least one case this misses: Z_NEED_DICT is "2". This isn't something we'd ever expect to see, but if we do see it, we should consider it an error (since we have no dictionary to load). Instead, the current code interprets Z_NEED_DICT as success and looks for the object header's terminating NUL in the bytes we've read. This will generaly be zero bytes if the dictionary is mentioned at the start of the stream. So we'll fail to find it and complain "the header is too long" (ULHR_LONG). But really, the problem is that the object is malformed, and we should return ULHR_BAD. This is a minor bug, as we consider both cases to be an error. But it does mean we print the wrong error message. The test case added in the previous patch triggers this code, so we can just confirm the error message we see here. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- object-file.c | 2 +- t/t1006-cat-file.sh | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/object-file.c b/object-file.c index 5086633e21289e..0bc62b53d3c25a 100644 --- a/object-file.c +++ b/object-file.c @@ -1273,7 +1273,7 @@ enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, obj_read_unlock(); status = git_inflate(stream, 0); obj_read_lock(); - if (status < Z_OK) + if (status != Z_OK && status != Z_STREAM_END) return ULHR_BAD; /* diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh index 04099f7b4a3a04..609dabd5cf3e18 100755 --- a/t/t1006-cat-file.sh +++ b/t/t1006-cat-file.sh @@ -865,6 +865,8 @@ test_expect_success 'object reading handles zlib dictionary' - <<\EOT printf '\170\273\017\112\003\143' >$objpath && test_must_fail git cat-file blob $blob 2>err && + test_grep ! 'too long' err && + test_grep 'error: unable to unpack' err && test_grep 'error: inflate: needs dictionary' err EOT From 9929a6791703c96e5f613cc3b52f4f9e16baa49c Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 25 Feb 2025 01:31:15 -0500 Subject: [PATCH 027/775] unpack_loose_rest(): avoid numeric comparison of zlib status When unpacking the actual content of a loose object file, we insist both that the status code we got is Z_STREAM_END, and that we consumed all bytes. If we didn't, we'll return an error, but the specific error message we produce depends on which of the two error conditions we saw. So we'll check both a second time to decide which error to produce. But this second time, our status code check is loose: it checks for a negative status value. This can get confused by zlib codes which are not negative, such as Z_NEED_DICT. In this case we'd erroneously print nothing at all, when we should say "corrupt loose object". Instead, this second check should check explicitly against Z_STREAM_END. Note that Z_OK is "0", so the existing code also produced no message for Z_OK. But it's impossible to see that status, since we only break out of the inflate loop when we stop seeing Z_OK (so a stream which has more bytes than its object header claims would eventually yield Z_BUF_ERROR). There's no test here, as it would require a loose object whose zlib stream returns Z_NEED_DICT in the middle of the object content. I think that is probably possible, but even our Z_NEED_DICT test in t1006 does not trigger this, because we hit that error while reading the header. I found this bug while reviewing all callers of git_inflate() for bugs similar to the one we saw in unpack_loose_header(). This was the only other case that did a numeric comparison rather than explicitly checking for Z_STREAM_END. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- object-file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/object-file.c b/object-file.c index 0bc62b53d3c25a..17d54c845d835d 100644 --- a/object-file.c +++ b/object-file.c @@ -1352,7 +1352,7 @@ static void *unpack_loose_rest(git_zstream *stream, return buf; } - if (status < 0) + if (status != Z_STREAM_END) error(_("corrupt loose object '%s'"), oid_to_hex(oid)); else if (stream->avail_in) error(_("garbage at end of loose object '%s'"), From 84b5c1a099e6df35f4b54d651b425a894513e62b Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 25 Feb 2025 01:33:12 -0500 Subject: [PATCH 028/775] unpack_loose_rest(): never clean up zstream The unpack_loose_rest() function has funny ownership semantics: we pass in a z_stream opened by the caller, but then only _sometimes_ close it. This oddity has developed over time. When the function was originally split out in 5180cacc20 (Split up unpack_sha1_file() some more, 2005-06-02), it always called inflateEnd() to clean up the stream (though nowadays it is a git_zstream and we call git_inflate_end()). But in 7efbff7531 (unpack_sha1_file(): detect corrupt loose object files., 2007-03-05) we added error code paths which don't close the stream. This makes some sense, as we'd still look at parts of the stream struct to decide which error to show (though I am not sure in practice if inflateEnd() even touches those fields). This subtlety makes it hard to know when the caller has to clean up the stream and when it does not. That led to the leak fixed by aa9ef614dc (object-file: fix memory leak when reading corrupted headers, 2024-08-14). Let's instead always leave the stream intact, forcing the caller to clean it up. You might think that would create more work for the callers, but it actually ends up simplifying them, since they can put the call to git_inflate_end() in the common cleanup code path. Two things to note, though: - The check_stream_oid() function is used as a replacement for unpack_loose_rest() in read_loose_object() to read blobs. It inherited the same funny semantics, and we should fix it here, too (to keep the cleanup in read_loose_object() consistent). - In read_loose_object() we need a second "out" label, as we can jump to the existing label before opening the stream at all (and since the struct is opaque, there is no way to if it was initialized or not, so we must not call git_inflate_end() in that case). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- object-file.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/object-file.c b/object-file.c index 17d54c845d835d..f9713e4e8bacf1 100644 --- a/object-file.c +++ b/object-file.c @@ -1348,7 +1348,6 @@ static void *unpack_loose_rest(git_zstream *stream, } } if (status == Z_STREAM_END && !stream->avail_in) { - git_inflate_end(stream); return buf; } @@ -1512,8 +1511,8 @@ static int loose_object_info(struct repository *r, die(_("loose object %s (stored in %s) is corrupt"), oid_to_hex(oid), path); - git_inflate_end(&stream); cleanup: + git_inflate_end(&stream); munmap(map, mapsize); if (oi->sizep == &size_scratch) oi->sizep = NULL; @@ -2735,7 +2734,6 @@ static int check_stream_oid(git_zstream *stream, the_hash_algo->update_fn(&c, buf, stream->next_out - buf); total_read += stream->next_out - buf; } - git_inflate_end(stream); if (status != Z_STREAM_END) { error(_("corrupt loose object '%s'"), oid_to_hex(expected_oid)); @@ -2782,34 +2780,34 @@ int read_loose_object(const char *path, if (unpack_loose_header(&stream, map, mapsize, hdr, sizeof(hdr), NULL) != ULHR_OK) { error(_("unable to unpack header of %s"), path); - goto out; + goto out_inflate; } if (parse_loose_header(hdr, oi) < 0) { error(_("unable to parse header of %s"), path); - git_inflate_end(&stream); - goto out; + goto out_inflate; } if (*oi->typep == OBJ_BLOB && *size > big_file_threshold) { if (check_stream_oid(&stream, hdr, *size, path, expected_oid) < 0) - goto out; + goto out_inflate; } else { *contents = unpack_loose_rest(&stream, hdr, *size, expected_oid); if (!*contents) { error(_("unable to unpack contents of %s"), path); - git_inflate_end(&stream); - goto out; + goto out_inflate; } hash_object_file_literally(the_repository->hash_algo, *contents, *size, oi->type_name->buf, real_oid); if (!oideq(expected_oid, real_oid)) - goto out; + goto out_inflate; } ret = 0; /* everything checks out */ +out_inflate: + git_inflate_end(&stream); out: if (map) munmap(map, mapsize); From 547f719d9b022e87eb8cf3cb7a7632822b996e29 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 25 Feb 2025 01:33:51 -0500 Subject: [PATCH 029/775] unpack_loose_rest(): simplify error handling Inflating a loose object is considered successful only if we got Z_STREAM_END and there were no more bytes. We check both of those conditions and return success, but then have to check them a second time to decide which error message to produce. I.e., we do something like this: if (!error_1 && !error_2) ...return success... if (error_1) ...handle error1... else if (error_2) ...handle error2... ...common error handling... This repetition was the source of a small bug fixed in an earlier commit (our Z_STREAM_END check was not the same in the two conditionals). Instead we can chain them all into a single if/else cascade, which avoids repeating ourselves: if (error_1) ...handle error1... else if (error_2) ...handle error2.... else ...return success... ...common error handling... Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- object-file.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/object-file.c b/object-file.c index f9713e4e8bacf1..9f6e8504fb8044 100644 --- a/object-file.c +++ b/object-file.c @@ -1347,15 +1347,15 @@ static void *unpack_loose_rest(git_zstream *stream, obj_read_lock(); } } - if (status == Z_STREAM_END && !stream->avail_in) { - return buf; - } if (status != Z_STREAM_END) error(_("corrupt loose object '%s'"), oid_to_hex(oid)); else if (stream->avail_in) error(_("garbage at end of loose object '%s'"), oid_to_hex(oid)); + else + return buf; + free(buf); return NULL; } From 1cb2f293f5a594fd5dee8400213bd2f395fbd2bf Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 25 Feb 2025 01:34:21 -0500 Subject: [PATCH 030/775] unpack_loose_rest(): rewrite return handling for clarity We have a pattern like: if (error1) ...handle error 1... else if (error2) ...handle error 2... else ...return buf... ...free buf and return NULL... This is a little subtle because it is the return in the success block that lets us skip the common error handling. Rewrite this instead to free the buffer in each error path, marking it as NULL, and then all code paths can use the common return. This should make the logic a bit easier to follow. It does mean duplicating the buf cleanup for errors, but it's a single line. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- object-file.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/object-file.c b/object-file.c index 9f6e8504fb8044..e463b4bad3e4bc 100644 --- a/object-file.c +++ b/object-file.c @@ -1348,16 +1348,16 @@ static void *unpack_loose_rest(git_zstream *stream, } } - if (status != Z_STREAM_END) + if (status != Z_STREAM_END) { error(_("corrupt loose object '%s'"), oid_to_hex(oid)); - else if (stream->avail_in) + FREE_AND_NULL(buf); + } else if (stream->avail_in) { error(_("garbage at end of loose object '%s'"), oid_to_hex(oid)); - else - return buf; + FREE_AND_NULL(buf); + } - free(buf); - return NULL; + return buf; } /* From 320f2061b63b0157ea8b21362ed12e4ce90258e2 Mon Sep 17 00:00:00 2001 From: shejialuo Date: Fri, 28 Feb 2025 00:05:55 +0800 Subject: [PATCH 031/775] t0602: use subshell to ensure working directory unchanged For every test, we would execute the command "cd repo" in the first but we never execute the command "cd .." to restore the working directory. However, it's either not a good idea use above way. Because if any test fails between "cd repo" and "cd ..", the "cd .." will never be reached. And we cannot correctly restore the working directory. Let's use subshell to ensure that the current working directory could be restored to the correct path. Mentored-by: Patrick Steinhardt Mentored-by: Karthik Nayak Signed-off-by: shejialuo Signed-off-by: Junio C Hamano --- t/t0602-reffiles-fsck.sh | 967 ++++++++++++++++++++------------------- 1 file changed, 494 insertions(+), 473 deletions(-) diff --git a/t/t0602-reffiles-fsck.sh b/t/t0602-reffiles-fsck.sh index d4a08b823b7db7..cf7a202d0ded7c 100755 --- a/t/t0602-reffiles-fsck.sh +++ b/t/t0602-reffiles-fsck.sh @@ -14,222 +14,229 @@ test_expect_success 'ref name should be checked' ' git init repo && branch_dir_prefix=.git/refs/heads && tag_dir_prefix=.git/refs/tags && - cd repo && - - git commit --allow-empty -m initial && - git checkout -b default-branch && - git tag default-tag && - git tag multi_hierarchy/default-tag && - - cp $branch_dir_prefix/default-branch $branch_dir_prefix/@ && - git refs verify 2>err && - test_must_be_empty err && - rm $branch_dir_prefix/@ && - - cp $tag_dir_prefix/default-tag $tag_dir_prefix/tag-1.lock && - git refs verify 2>err && - rm $tag_dir_prefix/tag-1.lock && - test_must_be_empty err && - - cp $tag_dir_prefix/default-tag $tag_dir_prefix/.lock && - test_must_fail git refs verify 2>err && - cat >expect <<-EOF && - error: refs/tags/.lock: badRefName: invalid refname format - EOF - rm $tag_dir_prefix/.lock && - test_cmp expect err && - - for refname in ".refname-starts-with-dot" "~refname-has-stride" - do - cp $branch_dir_prefix/default-branch "$branch_dir_prefix/$refname" && - test_must_fail git refs verify 2>err && - cat >expect <<-EOF && - error: refs/heads/$refname: badRefName: invalid refname format - EOF - rm "$branch_dir_prefix/$refname" && - test_cmp expect err || return 1 - done && + ( + cd repo && - for refname in ".refname-starts-with-dot" "~refname-has-stride" - do - cp $tag_dir_prefix/default-tag "$tag_dir_prefix/$refname" && - test_must_fail git refs verify 2>err && - cat >expect <<-EOF && - error: refs/tags/$refname: badRefName: invalid refname format - EOF - rm "$tag_dir_prefix/$refname" && - test_cmp expect err || return 1 - done && + git commit --allow-empty -m initial && + git checkout -b default-branch && + git tag default-tag && + git tag multi_hierarchy/default-tag && - for refname in ".refname-starts-with-dot" "~refname-has-stride" - do - cp $tag_dir_prefix/multi_hierarchy/default-tag "$tag_dir_prefix/multi_hierarchy/$refname" && - test_must_fail git refs verify 2>err && - cat >expect <<-EOF && - error: refs/tags/multi_hierarchy/$refname: badRefName: invalid refname format - EOF - rm "$tag_dir_prefix/multi_hierarchy/$refname" && - test_cmp expect err || return 1 - done && - - for refname in ".refname-starts-with-dot" "~refname-has-stride" - do - mkdir "$branch_dir_prefix/$refname" && - cp $branch_dir_prefix/default-branch "$branch_dir_prefix/$refname/default-branch" && + cp $branch_dir_prefix/default-branch $branch_dir_prefix/@ && + git refs verify 2>err && + test_must_be_empty err && + rm $branch_dir_prefix/@ && + + cp $tag_dir_prefix/default-tag $tag_dir_prefix/tag-1.lock && + git refs verify 2>err && + rm $tag_dir_prefix/tag-1.lock && + test_must_be_empty err && + + cp $tag_dir_prefix/default-tag $tag_dir_prefix/.lock && test_must_fail git refs verify 2>err && cat >expect <<-EOF && - error: refs/heads/$refname/default-branch: badRefName: invalid refname format + error: refs/tags/.lock: badRefName: invalid refname format EOF - rm -r "$branch_dir_prefix/$refname" && - test_cmp expect err || return 1 - done + rm $tag_dir_prefix/.lock && + test_cmp expect err && + + for refname in ".refname-starts-with-dot" "~refname-has-stride" + do + cp $branch_dir_prefix/default-branch "$branch_dir_prefix/$refname" && + test_must_fail git refs verify 2>err && + cat >expect <<-EOF && + error: refs/heads/$refname: badRefName: invalid refname format + EOF + rm "$branch_dir_prefix/$refname" && + test_cmp expect err || return 1 + done && + + for refname in ".refname-starts-with-dot" "~refname-has-stride" + do + cp $tag_dir_prefix/default-tag "$tag_dir_prefix/$refname" && + test_must_fail git refs verify 2>err && + cat >expect <<-EOF && + error: refs/tags/$refname: badRefName: invalid refname format + EOF + rm "$tag_dir_prefix/$refname" && + test_cmp expect err || return 1 + done && + + for refname in ".refname-starts-with-dot" "~refname-has-stride" + do + cp $tag_dir_prefix/multi_hierarchy/default-tag "$tag_dir_prefix/multi_hierarchy/$refname" && + test_must_fail git refs verify 2>err && + cat >expect <<-EOF && + error: refs/tags/multi_hierarchy/$refname: badRefName: invalid refname format + EOF + rm "$tag_dir_prefix/multi_hierarchy/$refname" && + test_cmp expect err || return 1 + done && + + for refname in ".refname-starts-with-dot" "~refname-has-stride" + do + mkdir "$branch_dir_prefix/$refname" && + cp $branch_dir_prefix/default-branch "$branch_dir_prefix/$refname/default-branch" && + test_must_fail git refs verify 2>err && + cat >expect <<-EOF && + error: refs/heads/$refname/default-branch: badRefName: invalid refname format + EOF + rm -r "$branch_dir_prefix/$refname" && + test_cmp expect err || return 1 + done + ) ' test_expect_success 'ref name check should be adapted into fsck messages' ' test_when_finished "rm -rf repo" && git init repo && branch_dir_prefix=.git/refs/heads && - cd repo && - git commit --allow-empty -m initial && - git checkout -b branch-1 && - - cp $branch_dir_prefix/branch-1 $branch_dir_prefix/.branch-1 && - git -c fsck.badRefName=warn refs verify 2>err && - cat >expect <<-EOF && - warning: refs/heads/.branch-1: badRefName: invalid refname format - EOF - rm $branch_dir_prefix/.branch-1 && - test_cmp expect err && - - cp $branch_dir_prefix/branch-1 $branch_dir_prefix/.branch-1 && - git -c fsck.badRefName=ignore refs verify 2>err && - test_must_be_empty err + ( + cd repo && + git commit --allow-empty -m initial && + git checkout -b branch-1 && + + cp $branch_dir_prefix/branch-1 $branch_dir_prefix/.branch-1 && + git -c fsck.badRefName=warn refs verify 2>err && + cat >expect <<-EOF && + warning: refs/heads/.branch-1: badRefName: invalid refname format + EOF + rm $branch_dir_prefix/.branch-1 && + test_cmp expect err && + + cp $branch_dir_prefix/branch-1 $branch_dir_prefix/.branch-1 && + git -c fsck.badRefName=ignore refs verify 2>err && + test_must_be_empty err + ) ' test_expect_success 'ref name check should work for multiple worktrees' ' test_when_finished "rm -rf repo" && git init repo && - - cd repo && - test_commit initial && - git checkout -b branch-1 && - test_commit second && - git checkout -b branch-2 && - test_commit third && - git checkout -b branch-3 && - git worktree add ./worktree-1 branch-1 && - git worktree add ./worktree-2 branch-2 && - worktree1_refdir_prefix=.git/worktrees/worktree-1/refs/worktree && - worktree2_refdir_prefix=.git/worktrees/worktree-2/refs/worktree && - - ( - cd worktree-1 && - git update-ref refs/worktree/branch-4 refs/heads/branch-3 - ) && ( - cd worktree-2 && - git update-ref refs/worktree/branch-4 refs/heads/branch-3 - ) && - - cp $worktree1_refdir_prefix/branch-4 $worktree1_refdir_prefix/'\'' branch-5'\'' && - cp $worktree2_refdir_prefix/branch-4 $worktree2_refdir_prefix/'\''~branch-6'\'' && - - test_must_fail git refs verify 2>err && - cat >expect <<-EOF && - error: worktrees/worktree-1/refs/worktree/ branch-5: badRefName: invalid refname format - error: worktrees/worktree-2/refs/worktree/~branch-6: badRefName: invalid refname format - EOF - sort err >sorted_err && - test_cmp expect sorted_err && - - for worktree in "worktree-1" "worktree-2" - do + cd repo && + test_commit initial && + git checkout -b branch-1 && + test_commit second && + git checkout -b branch-2 && + test_commit third && + git checkout -b branch-3 && + git worktree add ./worktree-1 branch-1 && + git worktree add ./worktree-2 branch-2 && + worktree1_refdir_prefix=.git/worktrees/worktree-1/refs/worktree && + worktree2_refdir_prefix=.git/worktrees/worktree-2/refs/worktree && + ( - cd $worktree && - test_must_fail git refs verify 2>err && - cat >expect <<-EOF && - error: worktrees/worktree-1/refs/worktree/ branch-5: badRefName: invalid refname format - error: worktrees/worktree-2/refs/worktree/~branch-6: badRefName: invalid refname format - EOF - sort err >sorted_err && - test_cmp expect sorted_err || return 1 - ) - done + cd worktree-1 && + git update-ref refs/worktree/branch-4 refs/heads/branch-3 + ) && + ( + cd worktree-2 && + git update-ref refs/worktree/branch-4 refs/heads/branch-3 + ) && + + cp $worktree1_refdir_prefix/branch-4 $worktree1_refdir_prefix/'\'' branch-5'\'' && + cp $worktree2_refdir_prefix/branch-4 $worktree2_refdir_prefix/'\''~branch-6'\'' && + + test_must_fail git refs verify 2>err && + cat >expect <<-EOF && + error: worktrees/worktree-1/refs/worktree/ branch-5: badRefName: invalid refname format + error: worktrees/worktree-2/refs/worktree/~branch-6: badRefName: invalid refname format + EOF + sort err >sorted_err && + test_cmp expect sorted_err && + + for worktree in "worktree-1" "worktree-2" + do + ( + cd $worktree && + test_must_fail git refs verify 2>err && + cat >expect <<-EOF && + error: worktrees/worktree-1/refs/worktree/ branch-5: badRefName: invalid refname format + error: worktrees/worktree-2/refs/worktree/~branch-6: badRefName: invalid refname format + EOF + sort err >sorted_err && + test_cmp expect sorted_err || return 1 + ) + done + ) ' test_expect_success 'regular ref content should be checked (individual)' ' test_when_finished "rm -rf repo" && git init repo && branch_dir_prefix=.git/refs/heads && - cd repo && - test_commit default && - mkdir -p "$branch_dir_prefix/a/b" && + ( + cd repo && + test_commit default && + mkdir -p "$branch_dir_prefix/a/b" && - git refs verify 2>err && - test_must_be_empty err && + git refs verify 2>err && + test_must_be_empty err && - for bad_content in "$(git rev-parse main)x" "xfsazqfxcadas" "Xfsazqfxcadas" - do - printf "%s" $bad_content >$branch_dir_prefix/branch-bad && - test_must_fail git refs verify 2>err && - cat >expect <<-EOF && - error: refs/heads/branch-bad: badRefContent: $bad_content - EOF - rm $branch_dir_prefix/branch-bad && - test_cmp expect err || return 1 - done && + for bad_content in "$(git rev-parse main)x" "xfsazqfxcadas" "Xfsazqfxcadas" + do + printf "%s" $bad_content >$branch_dir_prefix/branch-bad && + test_must_fail git refs verify 2>err && + cat >expect <<-EOF && + error: refs/heads/branch-bad: badRefContent: $bad_content + EOF + rm $branch_dir_prefix/branch-bad && + test_cmp expect err || return 1 + done && - for bad_content in "$(git rev-parse main)x" "xfsazqfxcadas" "Xfsazqfxcadas" - do - printf "%s" $bad_content >$branch_dir_prefix/a/b/branch-bad && - test_must_fail git refs verify 2>err && - cat >expect <<-EOF && - error: refs/heads/a/b/branch-bad: badRefContent: $bad_content - EOF - rm $branch_dir_prefix/a/b/branch-bad && - test_cmp expect err || return 1 - done && - - printf "%s" "$(git rev-parse main)" >$branch_dir_prefix/branch-no-newline && - git refs verify 2>err && - cat >expect <<-EOF && - warning: refs/heads/branch-no-newline: refMissingNewline: misses LF at the end - EOF - rm $branch_dir_prefix/branch-no-newline && - test_cmp expect err && - - for trailing_content in " garbage" " more garbage" - do - printf "%s" "$(git rev-parse main)$trailing_content" >$branch_dir_prefix/branch-garbage && + for bad_content in "$(git rev-parse main)x" "xfsazqfxcadas" "Xfsazqfxcadas" + do + printf "%s" $bad_content >$branch_dir_prefix/a/b/branch-bad && + test_must_fail git refs verify 2>err && + cat >expect <<-EOF && + error: refs/heads/a/b/branch-bad: badRefContent: $bad_content + EOF + rm $branch_dir_prefix/a/b/branch-bad && + test_cmp expect err || return 1 + done && + + printf "%s" "$(git rev-parse main)" >$branch_dir_prefix/branch-no-newline && git refs verify 2>err && cat >expect <<-EOF && - warning: refs/heads/branch-garbage: trailingRefContent: has trailing garbage: '\''$trailing_content'\'' + warning: refs/heads/branch-no-newline: refMissingNewline: misses LF at the end EOF - rm $branch_dir_prefix/branch-garbage && - test_cmp expect err || return 1 - done && + rm $branch_dir_prefix/branch-no-newline && + test_cmp expect err && - printf "%s\n\n\n" "$(git rev-parse main)" >$branch_dir_prefix/branch-garbage-special && - git refs verify 2>err && - cat >expect <<-EOF && - warning: refs/heads/branch-garbage-special: trailingRefContent: has trailing garbage: '\'' + for trailing_content in " garbage" " more garbage" + do + printf "%s" "$(git rev-parse main)$trailing_content" >$branch_dir_prefix/branch-garbage && + git refs verify 2>err && + cat >expect <<-EOF && + warning: refs/heads/branch-garbage: trailingRefContent: has trailing garbage: '\''$trailing_content'\'' + EOF + rm $branch_dir_prefix/branch-garbage && + test_cmp expect err || return 1 + done && + printf "%s\n\n\n" "$(git rev-parse main)" >$branch_dir_prefix/branch-garbage-special && + git refs verify 2>err && + cat >expect <<-EOF && + warning: refs/heads/branch-garbage-special: trailingRefContent: has trailing garbage: '\'' - '\'' - EOF - rm $branch_dir_prefix/branch-garbage-special && - test_cmp expect err && - printf "%s\n\n\n garbage" "$(git rev-parse main)" >$branch_dir_prefix/branch-garbage-special && - git refs verify 2>err && - cat >expect <<-EOF && - warning: refs/heads/branch-garbage-special: trailingRefContent: has trailing garbage: '\'' + '\'' + EOF + rm $branch_dir_prefix/branch-garbage-special && + test_cmp expect err && + + printf "%s\n\n\n garbage" "$(git rev-parse main)" >$branch_dir_prefix/branch-garbage-special && + git refs verify 2>err && + cat >expect <<-EOF && + warning: refs/heads/branch-garbage-special: trailingRefContent: has trailing garbage: '\'' - garbage'\'' - EOF - rm $branch_dir_prefix/branch-garbage-special && - test_cmp expect err + garbage'\'' + EOF + rm $branch_dir_prefix/branch-garbage-special && + test_cmp expect err + ) ' test_expect_success 'regular ref content should be checked (aggregate)' ' @@ -237,99 +244,103 @@ test_expect_success 'regular ref content should be checked (aggregate)' ' git init repo && branch_dir_prefix=.git/refs/heads && tag_dir_prefix=.git/refs/tags && - cd repo && - test_commit default && - mkdir -p "$branch_dir_prefix/a/b" && - - bad_content_1=$(git rev-parse main)x && - bad_content_2=xfsazqfxcadas && - bad_content_3=Xfsazqfxcadas && - printf "%s" $bad_content_1 >$tag_dir_prefix/tag-bad-1 && - printf "%s" $bad_content_2 >$tag_dir_prefix/tag-bad-2 && - printf "%s" $bad_content_3 >$branch_dir_prefix/a/b/branch-bad && - printf "%s" "$(git rev-parse main)" >$branch_dir_prefix/branch-no-newline && - printf "%s garbage" "$(git rev-parse main)" >$branch_dir_prefix/branch-garbage && - - test_must_fail git refs verify 2>err && - cat >expect <<-EOF && - error: refs/heads/a/b/branch-bad: badRefContent: $bad_content_3 - error: refs/tags/tag-bad-1: badRefContent: $bad_content_1 - error: refs/tags/tag-bad-2: badRefContent: $bad_content_2 - warning: refs/heads/branch-garbage: trailingRefContent: has trailing garbage: '\'' garbage'\'' - warning: refs/heads/branch-no-newline: refMissingNewline: misses LF at the end - EOF - sort err >sorted_err && - test_cmp expect sorted_err + ( + cd repo && + test_commit default && + mkdir -p "$branch_dir_prefix/a/b" && + + bad_content_1=$(git rev-parse main)x && + bad_content_2=xfsazqfxcadas && + bad_content_3=Xfsazqfxcadas && + printf "%s" $bad_content_1 >$tag_dir_prefix/tag-bad-1 && + printf "%s" $bad_content_2 >$tag_dir_prefix/tag-bad-2 && + printf "%s" $bad_content_3 >$branch_dir_prefix/a/b/branch-bad && + printf "%s" "$(git rev-parse main)" >$branch_dir_prefix/branch-no-newline && + printf "%s garbage" "$(git rev-parse main)" >$branch_dir_prefix/branch-garbage && + + test_must_fail git refs verify 2>err && + cat >expect <<-EOF && + error: refs/heads/a/b/branch-bad: badRefContent: $bad_content_3 + error: refs/tags/tag-bad-1: badRefContent: $bad_content_1 + error: refs/tags/tag-bad-2: badRefContent: $bad_content_2 + warning: refs/heads/branch-garbage: trailingRefContent: has trailing garbage: '\'' garbage'\'' + warning: refs/heads/branch-no-newline: refMissingNewline: misses LF at the end + EOF + sort err >sorted_err && + test_cmp expect sorted_err + ) ' test_expect_success 'textual symref content should be checked (individual)' ' test_when_finished "rm -rf repo" && git init repo && branch_dir_prefix=.git/refs/heads && - cd repo && - test_commit default && - mkdir -p "$branch_dir_prefix/a/b" && + ( + cd repo && + test_commit default && + mkdir -p "$branch_dir_prefix/a/b" && + + for good_referent in "refs/heads/branch" "HEAD" + do + printf "ref: %s\n" $good_referent >$branch_dir_prefix/branch-good && + git refs verify 2>err && + rm $branch_dir_prefix/branch-good && + test_must_be_empty err || return 1 + done && + + for bad_referent in "refs/heads/.branch" "refs/heads/~branch" "refs/heads/?branch" + do + printf "ref: %s\n" $bad_referent >$branch_dir_prefix/branch-bad && + test_must_fail git refs verify 2>err && + cat >expect <<-EOF && + error: refs/heads/branch-bad: badReferentName: points to invalid refname '\''$bad_referent'\'' + EOF + rm $branch_dir_prefix/branch-bad && + test_cmp expect err || return 1 + done && - for good_referent in "refs/heads/branch" "HEAD" - do - printf "ref: %s\n" $good_referent >$branch_dir_prefix/branch-good && + printf "ref: refs/heads/branch" >$branch_dir_prefix/branch-no-newline && git refs verify 2>err && - rm $branch_dir_prefix/branch-good && - test_must_be_empty err || return 1 - done && + cat >expect <<-EOF && + warning: refs/heads/branch-no-newline: refMissingNewline: misses LF at the end + EOF + rm $branch_dir_prefix/branch-no-newline && + test_cmp expect err && - for bad_referent in "refs/heads/.branch" "refs/heads/~branch" "refs/heads/?branch" - do - printf "ref: %s\n" $bad_referent >$branch_dir_prefix/branch-bad && - test_must_fail git refs verify 2>err && + printf "ref: refs/heads/branch " >$branch_dir_prefix/a/b/branch-trailing-1 && + git refs verify 2>err && + cat >expect <<-EOF && + warning: refs/heads/a/b/branch-trailing-1: refMissingNewline: misses LF at the end + warning: refs/heads/a/b/branch-trailing-1: trailingRefContent: has trailing whitespaces or newlines + EOF + rm $branch_dir_prefix/a/b/branch-trailing-1 && + test_cmp expect err && + + printf "ref: refs/heads/branch\n\n" >$branch_dir_prefix/a/b/branch-trailing-2 && + git refs verify 2>err && cat >expect <<-EOF && - error: refs/heads/branch-bad: badReferentName: points to invalid refname '\''$bad_referent'\'' + warning: refs/heads/a/b/branch-trailing-2: trailingRefContent: has trailing whitespaces or newlines EOF - rm $branch_dir_prefix/branch-bad && - test_cmp expect err || return 1 - done && - - printf "ref: refs/heads/branch" >$branch_dir_prefix/branch-no-newline && - git refs verify 2>err && - cat >expect <<-EOF && - warning: refs/heads/branch-no-newline: refMissingNewline: misses LF at the end - EOF - rm $branch_dir_prefix/branch-no-newline && - test_cmp expect err && - - printf "ref: refs/heads/branch " >$branch_dir_prefix/a/b/branch-trailing-1 && - git refs verify 2>err && - cat >expect <<-EOF && - warning: refs/heads/a/b/branch-trailing-1: refMissingNewline: misses LF at the end - warning: refs/heads/a/b/branch-trailing-1: trailingRefContent: has trailing whitespaces or newlines - EOF - rm $branch_dir_prefix/a/b/branch-trailing-1 && - test_cmp expect err && - - printf "ref: refs/heads/branch\n\n" >$branch_dir_prefix/a/b/branch-trailing-2 && - git refs verify 2>err && - cat >expect <<-EOF && - warning: refs/heads/a/b/branch-trailing-2: trailingRefContent: has trailing whitespaces or newlines - EOF - rm $branch_dir_prefix/a/b/branch-trailing-2 && - test_cmp expect err && - - printf "ref: refs/heads/branch \n" >$branch_dir_prefix/a/b/branch-trailing-3 && - git refs verify 2>err && - cat >expect <<-EOF && - warning: refs/heads/a/b/branch-trailing-3: trailingRefContent: has trailing whitespaces or newlines - EOF - rm $branch_dir_prefix/a/b/branch-trailing-3 && - test_cmp expect err && - - printf "ref: refs/heads/branch \n " >$branch_dir_prefix/a/b/branch-complicated && - git refs verify 2>err && - cat >expect <<-EOF && - warning: refs/heads/a/b/branch-complicated: refMissingNewline: misses LF at the end - warning: refs/heads/a/b/branch-complicated: trailingRefContent: has trailing whitespaces or newlines - EOF - rm $branch_dir_prefix/a/b/branch-complicated && - test_cmp expect err + rm $branch_dir_prefix/a/b/branch-trailing-2 && + test_cmp expect err && + + printf "ref: refs/heads/branch \n" >$branch_dir_prefix/a/b/branch-trailing-3 && + git refs verify 2>err && + cat >expect <<-EOF && + warning: refs/heads/a/b/branch-trailing-3: trailingRefContent: has trailing whitespaces or newlines + EOF + rm $branch_dir_prefix/a/b/branch-trailing-3 && + test_cmp expect err && + + printf "ref: refs/heads/branch \n " >$branch_dir_prefix/a/b/branch-complicated && + git refs verify 2>err && + cat >expect <<-EOF && + warning: refs/heads/a/b/branch-complicated: refMissingNewline: misses LF at the end + warning: refs/heads/a/b/branch-complicated: trailingRefContent: has trailing whitespaces or newlines + EOF + rm $branch_dir_prefix/a/b/branch-complicated && + test_cmp expect err + ) ' test_expect_success 'textual symref content should be checked (aggregate)' ' @@ -337,32 +348,34 @@ test_expect_success 'textual symref content should be checked (aggregate)' ' git init repo && branch_dir_prefix=.git/refs/heads && tag_dir_prefix=.git/refs/tags && - cd repo && - test_commit default && - mkdir -p "$branch_dir_prefix/a/b" && - - printf "ref: refs/heads/branch\n" >$branch_dir_prefix/branch-good && - printf "ref: HEAD\n" >$branch_dir_prefix/branch-head && - printf "ref: refs/heads/branch" >$branch_dir_prefix/branch-no-newline-1 && - printf "ref: refs/heads/branch " >$branch_dir_prefix/a/b/branch-trailing-1 && - printf "ref: refs/heads/branch\n\n" >$branch_dir_prefix/a/b/branch-trailing-2 && - printf "ref: refs/heads/branch \n" >$branch_dir_prefix/a/b/branch-trailing-3 && - printf "ref: refs/heads/branch \n " >$branch_dir_prefix/a/b/branch-complicated && - printf "ref: refs/heads/.branch\n" >$branch_dir_prefix/branch-bad-1 && - - test_must_fail git refs verify 2>err && - cat >expect <<-EOF && - error: refs/heads/branch-bad-1: badReferentName: points to invalid refname '\''refs/heads/.branch'\'' - warning: refs/heads/a/b/branch-complicated: refMissingNewline: misses LF at the end - warning: refs/heads/a/b/branch-complicated: trailingRefContent: has trailing whitespaces or newlines - warning: refs/heads/a/b/branch-trailing-1: refMissingNewline: misses LF at the end - warning: refs/heads/a/b/branch-trailing-1: trailingRefContent: has trailing whitespaces or newlines - warning: refs/heads/a/b/branch-trailing-2: trailingRefContent: has trailing whitespaces or newlines - warning: refs/heads/a/b/branch-trailing-3: trailingRefContent: has trailing whitespaces or newlines - warning: refs/heads/branch-no-newline-1: refMissingNewline: misses LF at the end - EOF - sort err >sorted_err && - test_cmp expect sorted_err + ( + cd repo && + test_commit default && + mkdir -p "$branch_dir_prefix/a/b" && + + printf "ref: refs/heads/branch\n" >$branch_dir_prefix/branch-good && + printf "ref: HEAD\n" >$branch_dir_prefix/branch-head && + printf "ref: refs/heads/branch" >$branch_dir_prefix/branch-no-newline-1 && + printf "ref: refs/heads/branch " >$branch_dir_prefix/a/b/branch-trailing-1 && + printf "ref: refs/heads/branch\n\n" >$branch_dir_prefix/a/b/branch-trailing-2 && + printf "ref: refs/heads/branch \n" >$branch_dir_prefix/a/b/branch-trailing-3 && + printf "ref: refs/heads/branch \n " >$branch_dir_prefix/a/b/branch-complicated && + printf "ref: refs/heads/.branch\n" >$branch_dir_prefix/branch-bad-1 && + + test_must_fail git refs verify 2>err && + cat >expect <<-EOF && + error: refs/heads/branch-bad-1: badReferentName: points to invalid refname '\''refs/heads/.branch'\'' + warning: refs/heads/a/b/branch-complicated: refMissingNewline: misses LF at the end + warning: refs/heads/a/b/branch-complicated: trailingRefContent: has trailing whitespaces or newlines + warning: refs/heads/a/b/branch-trailing-1: refMissingNewline: misses LF at the end + warning: refs/heads/a/b/branch-trailing-1: trailingRefContent: has trailing whitespaces or newlines + warning: refs/heads/a/b/branch-trailing-2: trailingRefContent: has trailing whitespaces or newlines + warning: refs/heads/a/b/branch-trailing-3: trailingRefContent: has trailing whitespaces or newlines + warning: refs/heads/branch-no-newline-1: refMissingNewline: misses LF at the end + EOF + sort err >sorted_err && + test_cmp expect sorted_err + ) ' test_expect_success 'the target of the textual symref should be checked' ' @@ -370,28 +383,30 @@ test_expect_success 'the target of the textual symref should be checked' ' git init repo && branch_dir_prefix=.git/refs/heads && tag_dir_prefix=.git/refs/tags && - cd repo && - test_commit default && - mkdir -p "$branch_dir_prefix/a/b" && - - for good_referent in "refs/heads/branch" "HEAD" "refs/tags/tag" - do - printf "ref: %s\n" $good_referent >$branch_dir_prefix/branch-good && - git refs verify 2>err && - rm $branch_dir_prefix/branch-good && - test_must_be_empty err || return 1 - done && - - for nonref_referent in "refs-back/heads/branch" "refs-back/tags/tag" "reflogs/refs/heads/branch" - do - printf "ref: %s\n" $nonref_referent >$branch_dir_prefix/branch-bad-1 && - git refs verify 2>err && - cat >expect <<-EOF && - warning: refs/heads/branch-bad-1: symrefTargetIsNotARef: points to non-ref target '\''$nonref_referent'\'' - EOF - rm $branch_dir_prefix/branch-bad-1 && - test_cmp expect err || return 1 - done + ( + cd repo && + test_commit default && + mkdir -p "$branch_dir_prefix/a/b" && + + for good_referent in "refs/heads/branch" "HEAD" "refs/tags/tag" + do + printf "ref: %s\n" $good_referent >$branch_dir_prefix/branch-good && + git refs verify 2>err && + rm $branch_dir_prefix/branch-good && + test_must_be_empty err || return 1 + done && + + for nonref_referent in "refs-back/heads/branch" "refs-back/tags/tag" "reflogs/refs/heads/branch" + do + printf "ref: %s\n" $nonref_referent >$branch_dir_prefix/branch-bad-1 && + git refs verify 2>err && + cat >expect <<-EOF && + warning: refs/heads/branch-bad-1: symrefTargetIsNotARef: points to non-ref target '\''$nonref_referent'\'' + EOF + rm $branch_dir_prefix/branch-bad-1 && + test_cmp expect err || return 1 + done + ) ' test_expect_success SYMLINKS 'symlink symref content should be checked' ' @@ -399,201 +414,207 @@ test_expect_success SYMLINKS 'symlink symref content should be checked' ' git init repo && branch_dir_prefix=.git/refs/heads && tag_dir_prefix=.git/refs/tags && - cd repo && - test_commit default && - mkdir -p "$branch_dir_prefix/a/b" && - - ln -sf ./main $branch_dir_prefix/branch-symbolic-good && - git refs verify 2>err && - cat >expect <<-EOF && - warning: refs/heads/branch-symbolic-good: symlinkRef: use deprecated symbolic link for symref - EOF - rm $branch_dir_prefix/branch-symbolic-good && - test_cmp expect err && - - ln -sf ../../logs/branch-escape $branch_dir_prefix/branch-symbolic && - git refs verify 2>err && - cat >expect <<-EOF && - warning: refs/heads/branch-symbolic: symlinkRef: use deprecated symbolic link for symref - warning: refs/heads/branch-symbolic: symrefTargetIsNotARef: points to non-ref target '\''logs/branch-escape'\'' - EOF - rm $branch_dir_prefix/branch-symbolic && - test_cmp expect err && - - ln -sf ./"branch " $branch_dir_prefix/branch-symbolic-bad && - test_must_fail git refs verify 2>err && - cat >expect <<-EOF && - warning: refs/heads/branch-symbolic-bad: symlinkRef: use deprecated symbolic link for symref - error: refs/heads/branch-symbolic-bad: badReferentName: points to invalid refname '\''refs/heads/branch '\'' - EOF - rm $branch_dir_prefix/branch-symbolic-bad && - test_cmp expect err && - - ln -sf ./".tag" $tag_dir_prefix/tag-symbolic-1 && - test_must_fail git refs verify 2>err && - cat >expect <<-EOF && - warning: refs/tags/tag-symbolic-1: symlinkRef: use deprecated symbolic link for symref - error: refs/tags/tag-symbolic-1: badReferentName: points to invalid refname '\''refs/tags/.tag'\'' - EOF - rm $tag_dir_prefix/tag-symbolic-1 && - test_cmp expect err + ( + cd repo && + test_commit default && + mkdir -p "$branch_dir_prefix/a/b" && + + ln -sf ./main $branch_dir_prefix/branch-symbolic-good && + git refs verify 2>err && + cat >expect <<-EOF && + warning: refs/heads/branch-symbolic-good: symlinkRef: use deprecated symbolic link for symref + EOF + rm $branch_dir_prefix/branch-symbolic-good && + test_cmp expect err && + + ln -sf ../../logs/branch-escape $branch_dir_prefix/branch-symbolic && + git refs verify 2>err && + cat >expect <<-EOF && + warning: refs/heads/branch-symbolic: symlinkRef: use deprecated symbolic link for symref + warning: refs/heads/branch-symbolic: symrefTargetIsNotARef: points to non-ref target '\''logs/branch-escape'\'' + EOF + rm $branch_dir_prefix/branch-symbolic && + test_cmp expect err && + + ln -sf ./"branch " $branch_dir_prefix/branch-symbolic-bad && + test_must_fail git refs verify 2>err && + cat >expect <<-EOF && + warning: refs/heads/branch-symbolic-bad: symlinkRef: use deprecated symbolic link for symref + error: refs/heads/branch-symbolic-bad: badReferentName: points to invalid refname '\''refs/heads/branch '\'' + EOF + rm $branch_dir_prefix/branch-symbolic-bad && + test_cmp expect err && + + ln -sf ./".tag" $tag_dir_prefix/tag-symbolic-1 && + test_must_fail git refs verify 2>err && + cat >expect <<-EOF && + warning: refs/tags/tag-symbolic-1: symlinkRef: use deprecated symbolic link for symref + error: refs/tags/tag-symbolic-1: badReferentName: points to invalid refname '\''refs/tags/.tag'\'' + EOF + rm $tag_dir_prefix/tag-symbolic-1 && + test_cmp expect err + ) ' test_expect_success SYMLINKS 'symlink symref content should be checked (worktree)' ' test_when_finished "rm -rf repo" && git init repo && - cd repo && - test_commit default && - git branch branch-1 && - git branch branch-2 && - git branch branch-3 && - git worktree add ./worktree-1 branch-2 && - git worktree add ./worktree-2 branch-3 && - main_worktree_refdir_prefix=.git/refs/heads && - worktree1_refdir_prefix=.git/worktrees/worktree-1/refs/worktree && - worktree2_refdir_prefix=.git/worktrees/worktree-2/refs/worktree && - ( - cd worktree-1 && - git update-ref refs/worktree/branch-4 refs/heads/branch-1 - ) && - ( - cd worktree-2 && - git update-ref refs/worktree/branch-4 refs/heads/branch-1 - ) && - - ln -sf ../../../../refs/heads/good-branch $worktree1_refdir_prefix/branch-symbolic-good && - git refs verify 2>err && - cat >expect <<-EOF && - warning: worktrees/worktree-1/refs/worktree/branch-symbolic-good: symlinkRef: use deprecated symbolic link for symref - EOF - rm $worktree1_refdir_prefix/branch-symbolic-good && - test_cmp expect err && - - ln -sf ../../../../worktrees/worktree-1/good-branch $worktree2_refdir_prefix/branch-symbolic-good && - git refs verify 2>err && - cat >expect <<-EOF && - warning: worktrees/worktree-2/refs/worktree/branch-symbolic-good: symlinkRef: use deprecated symbolic link for symref - EOF - rm $worktree2_refdir_prefix/branch-symbolic-good && - test_cmp expect err && - - ln -sf ../../worktrees/worktree-2/good-branch $main_worktree_refdir_prefix/branch-symbolic-good && - git refs verify 2>err && - cat >expect <<-EOF && - warning: refs/heads/branch-symbolic-good: symlinkRef: use deprecated symbolic link for symref - EOF - rm $main_worktree_refdir_prefix/branch-symbolic-good && - test_cmp expect err && - - ln -sf ../../../../logs/branch-escape $worktree1_refdir_prefix/branch-symbolic && - git refs verify 2>err && - cat >expect <<-EOF && - warning: worktrees/worktree-1/refs/worktree/branch-symbolic: symlinkRef: use deprecated symbolic link for symref - warning: worktrees/worktree-1/refs/worktree/branch-symbolic: symrefTargetIsNotARef: points to non-ref target '\''logs/branch-escape'\'' - EOF - rm $worktree1_refdir_prefix/branch-symbolic && - test_cmp expect err && - - for bad_referent_name in ".tag" "branch " - do - ln -sf ./"$bad_referent_name" $worktree1_refdir_prefix/bad-symbolic && - test_must_fail git refs verify 2>err && + cd repo && + test_commit default && + git branch branch-1 && + git branch branch-2 && + git branch branch-3 && + git worktree add ./worktree-1 branch-2 && + git worktree add ./worktree-2 branch-3 && + main_worktree_refdir_prefix=.git/refs/heads && + worktree1_refdir_prefix=.git/worktrees/worktree-1/refs/worktree && + worktree2_refdir_prefix=.git/worktrees/worktree-2/refs/worktree && + + ( + cd worktree-1 && + git update-ref refs/worktree/branch-4 refs/heads/branch-1 + ) && + ( + cd worktree-2 && + git update-ref refs/worktree/branch-4 refs/heads/branch-1 + ) && + + ln -sf ../../../../refs/heads/good-branch $worktree1_refdir_prefix/branch-symbolic-good && + git refs verify 2>err && cat >expect <<-EOF && - warning: worktrees/worktree-1/refs/worktree/bad-symbolic: symlinkRef: use deprecated symbolic link for symref - error: worktrees/worktree-1/refs/worktree/bad-symbolic: badReferentName: points to invalid refname '\''worktrees/worktree-1/refs/worktree/$bad_referent_name'\'' + warning: worktrees/worktree-1/refs/worktree/branch-symbolic-good: symlinkRef: use deprecated symbolic link for symref EOF - rm $worktree1_refdir_prefix/bad-symbolic && + rm $worktree1_refdir_prefix/branch-symbolic-good && test_cmp expect err && - ln -sf ../../../../refs/heads/"$bad_referent_name" $worktree1_refdir_prefix/bad-symbolic && - test_must_fail git refs verify 2>err && + ln -sf ../../../../worktrees/worktree-1/good-branch $worktree2_refdir_prefix/branch-symbolic-good && + git refs verify 2>err && cat >expect <<-EOF && - warning: worktrees/worktree-1/refs/worktree/bad-symbolic: symlinkRef: use deprecated symbolic link for symref - error: worktrees/worktree-1/refs/worktree/bad-symbolic: badReferentName: points to invalid refname '\''refs/heads/$bad_referent_name'\'' + warning: worktrees/worktree-2/refs/worktree/branch-symbolic-good: symlinkRef: use deprecated symbolic link for symref EOF - rm $worktree1_refdir_prefix/bad-symbolic && + rm $worktree2_refdir_prefix/branch-symbolic-good && test_cmp expect err && - ln -sf ./"$bad_referent_name" $worktree2_refdir_prefix/bad-symbolic && - test_must_fail git refs verify 2>err && + ln -sf ../../worktrees/worktree-2/good-branch $main_worktree_refdir_prefix/branch-symbolic-good && + git refs verify 2>err && cat >expect <<-EOF && - warning: worktrees/worktree-2/refs/worktree/bad-symbolic: symlinkRef: use deprecated symbolic link for symref - error: worktrees/worktree-2/refs/worktree/bad-symbolic: badReferentName: points to invalid refname '\''worktrees/worktree-2/refs/worktree/$bad_referent_name'\'' + warning: refs/heads/branch-symbolic-good: symlinkRef: use deprecated symbolic link for symref EOF - rm $worktree2_refdir_prefix/bad-symbolic && + rm $main_worktree_refdir_prefix/branch-symbolic-good && test_cmp expect err && - ln -sf ../../../../refs/heads/"$bad_referent_name" $worktree2_refdir_prefix/bad-symbolic && - test_must_fail git refs verify 2>err && + ln -sf ../../../../logs/branch-escape $worktree1_refdir_prefix/branch-symbolic && + git refs verify 2>err && cat >expect <<-EOF && - warning: worktrees/worktree-2/refs/worktree/bad-symbolic: symlinkRef: use deprecated symbolic link for symref - error: worktrees/worktree-2/refs/worktree/bad-symbolic: badReferentName: points to invalid refname '\''refs/heads/$bad_referent_name'\'' + warning: worktrees/worktree-1/refs/worktree/branch-symbolic: symlinkRef: use deprecated symbolic link for symref + warning: worktrees/worktree-1/refs/worktree/branch-symbolic: symrefTargetIsNotARef: points to non-ref target '\''logs/branch-escape'\'' EOF - rm $worktree2_refdir_prefix/bad-symbolic && - test_cmp expect err || return 1 - done + rm $worktree1_refdir_prefix/branch-symbolic && + test_cmp expect err && + + for bad_referent_name in ".tag" "branch " + do + ln -sf ./"$bad_referent_name" $worktree1_refdir_prefix/bad-symbolic && + test_must_fail git refs verify 2>err && + cat >expect <<-EOF && + warning: worktrees/worktree-1/refs/worktree/bad-symbolic: symlinkRef: use deprecated symbolic link for symref + error: worktrees/worktree-1/refs/worktree/bad-symbolic: badReferentName: points to invalid refname '\''worktrees/worktree-1/refs/worktree/$bad_referent_name'\'' + EOF + rm $worktree1_refdir_prefix/bad-symbolic && + test_cmp expect err && + + ln -sf ../../../../refs/heads/"$bad_referent_name" $worktree1_refdir_prefix/bad-symbolic && + test_must_fail git refs verify 2>err && + cat >expect <<-EOF && + warning: worktrees/worktree-1/refs/worktree/bad-symbolic: symlinkRef: use deprecated symbolic link for symref + error: worktrees/worktree-1/refs/worktree/bad-symbolic: badReferentName: points to invalid refname '\''refs/heads/$bad_referent_name'\'' + EOF + rm $worktree1_refdir_prefix/bad-symbolic && + test_cmp expect err && + + ln -sf ./"$bad_referent_name" $worktree2_refdir_prefix/bad-symbolic && + test_must_fail git refs verify 2>err && + cat >expect <<-EOF && + warning: worktrees/worktree-2/refs/worktree/bad-symbolic: symlinkRef: use deprecated symbolic link for symref + error: worktrees/worktree-2/refs/worktree/bad-symbolic: badReferentName: points to invalid refname '\''worktrees/worktree-2/refs/worktree/$bad_referent_name'\'' + EOF + rm $worktree2_refdir_prefix/bad-symbolic && + test_cmp expect err && + + ln -sf ../../../../refs/heads/"$bad_referent_name" $worktree2_refdir_prefix/bad-symbolic && + test_must_fail git refs verify 2>err && + cat >expect <<-EOF && + warning: worktrees/worktree-2/refs/worktree/bad-symbolic: symlinkRef: use deprecated symbolic link for symref + error: worktrees/worktree-2/refs/worktree/bad-symbolic: badReferentName: points to invalid refname '\''refs/heads/$bad_referent_name'\'' + EOF + rm $worktree2_refdir_prefix/bad-symbolic && + test_cmp expect err || return 1 + done + ) ' test_expect_success 'ref content checks should work with worktrees' ' test_when_finished "rm -rf repo" && git init repo && - cd repo && - test_commit default && - git branch branch-1 && - git branch branch-2 && - git branch branch-3 && - git worktree add ./worktree-1 branch-2 && - git worktree add ./worktree-2 branch-3 && - worktree1_refdir_prefix=.git/worktrees/worktree-1/refs/worktree && - worktree2_refdir_prefix=.git/worktrees/worktree-2/refs/worktree && - ( - cd worktree-1 && - git update-ref refs/worktree/branch-4 refs/heads/branch-1 - ) && - ( - cd worktree-2 && - git update-ref refs/worktree/branch-4 refs/heads/branch-1 - ) && + cd repo && + test_commit default && + git branch branch-1 && + git branch branch-2 && + git branch branch-3 && + git worktree add ./worktree-1 branch-2 && + git worktree add ./worktree-2 branch-3 && + worktree1_refdir_prefix=.git/worktrees/worktree-1/refs/worktree && + worktree2_refdir_prefix=.git/worktrees/worktree-2/refs/worktree && - for bad_content in "$(git rev-parse HEAD)x" "xfsazqfxcadas" "Xfsazqfxcadas" - do - printf "%s" $bad_content >$worktree1_refdir_prefix/bad-branch-1 && - test_must_fail git refs verify 2>err && + ( + cd worktree-1 && + git update-ref refs/worktree/branch-4 refs/heads/branch-1 + ) && + ( + cd worktree-2 && + git update-ref refs/worktree/branch-4 refs/heads/branch-1 + ) && + + for bad_content in "$(git rev-parse HEAD)x" "xfsazqfxcadas" "Xfsazqfxcadas" + do + printf "%s" $bad_content >$worktree1_refdir_prefix/bad-branch-1 && + test_must_fail git refs verify 2>err && + cat >expect <<-EOF && + error: worktrees/worktree-1/refs/worktree/bad-branch-1: badRefContent: $bad_content + EOF + rm $worktree1_refdir_prefix/bad-branch-1 && + test_cmp expect err || return 1 + done && + + for bad_content in "$(git rev-parse HEAD)x" "xfsazqfxcadas" "Xfsazqfxcadas" + do + printf "%s" $bad_content >$worktree2_refdir_prefix/bad-branch-2 && + test_must_fail git refs verify 2>err && + cat >expect <<-EOF && + error: worktrees/worktree-2/refs/worktree/bad-branch-2: badRefContent: $bad_content + EOF + rm $worktree2_refdir_prefix/bad-branch-2 && + test_cmp expect err || return 1 + done && + + printf "%s" "$(git rev-parse HEAD)" >$worktree1_refdir_prefix/branch-no-newline && + git refs verify 2>err && cat >expect <<-EOF && - error: worktrees/worktree-1/refs/worktree/bad-branch-1: badRefContent: $bad_content + warning: worktrees/worktree-1/refs/worktree/branch-no-newline: refMissingNewline: misses LF at the end EOF - rm $worktree1_refdir_prefix/bad-branch-1 && - test_cmp expect err || return 1 - done && + rm $worktree1_refdir_prefix/branch-no-newline && + test_cmp expect err && - for bad_content in "$(git rev-parse HEAD)x" "xfsazqfxcadas" "Xfsazqfxcadas" - do - printf "%s" $bad_content >$worktree2_refdir_prefix/bad-branch-2 && - test_must_fail git refs verify 2>err && + printf "%s garbage" "$(git rev-parse HEAD)" >$worktree1_refdir_prefix/branch-garbage && + git refs verify 2>err && cat >expect <<-EOF && - error: worktrees/worktree-2/refs/worktree/bad-branch-2: badRefContent: $bad_content + warning: worktrees/worktree-1/refs/worktree/branch-garbage: trailingRefContent: has trailing garbage: '\'' garbage'\'' EOF - rm $worktree2_refdir_prefix/bad-branch-2 && - test_cmp expect err || return 1 - done && - - printf "%s" "$(git rev-parse HEAD)" >$worktree1_refdir_prefix/branch-no-newline && - git refs verify 2>err && - cat >expect <<-EOF && - warning: worktrees/worktree-1/refs/worktree/branch-no-newline: refMissingNewline: misses LF at the end - EOF - rm $worktree1_refdir_prefix/branch-no-newline && - test_cmp expect err && - - printf "%s garbage" "$(git rev-parse HEAD)" >$worktree1_refdir_prefix/branch-garbage && - git refs verify 2>err && - cat >expect <<-EOF && - warning: worktrees/worktree-1/refs/worktree/branch-garbage: trailingRefContent: has trailing garbage: '\'' garbage'\'' - EOF - rm $worktree1_refdir_prefix/branch-garbage && - test_cmp expect err + rm $worktree1_refdir_prefix/branch-garbage && + test_cmp expect err + ) ' test_done From fdf3820b7ef69dcf887bd86565e2442f89edc7c0 Mon Sep 17 00:00:00 2001 From: shejialuo Date: Fri, 28 Feb 2025 00:06:06 +0800 Subject: [PATCH 032/775] builtin/refs: get worktrees without reading head information In "packed-backend.c", there are some functions such as "create_snapshot" and "next_record" which would check the correctness of the content of the "packed-ref" file. When anything is bad, the program will die. It may seem that we have nothing relevant to above feature, because we are going to read and parse the raw "packed-ref" file without creating the snapshot and using the ref iterator to check the consistency. However, when using "get_worktrees" in "builtin/refs", we would parse the "HEAD" information. If the referent of the "HEAD" is inside the "packed-ref", we will call "create_snapshot" function to parse the "packed-ref" to get the information. No matter whether the entry of "HEAD" in "packed-ref" is correct, "create_snapshot" would call "verify_buffer_safe" to check whether there is a newline in the last line of the file. If not, the program will die. Although this behavior has no harm for the program, it will short-circuit the program. When the users execute "git refs verify" or "git fsck", we should avoid reading the head information, which may execute the read operation in packed backend with stricter checks to die the program. Instead, we should continue to check other parts of the "packed-refs" file completely. Fortunately, in 465a22b338 (worktree: skip reading HEAD when repairing worktrees, 2023-12-29), we have introduced a function "get_worktrees_internal" which allows us to get worktrees without reading head information. Create a new exposed function "get_worktrees_without_reading_head", then replace the "get_worktrees" in "builtin/refs" with the new created function. Mentored-by: Patrick Steinhardt Mentored-by: Karthik Nayak Signed-off-by: shejialuo Signed-off-by: Junio C Hamano --- builtin/refs.c | 2 +- worktree.c | 5 +++++ worktree.h | 8 ++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/builtin/refs.c b/builtin/refs.c index a29f1958347451..55ff5dae11bba6 100644 --- a/builtin/refs.c +++ b/builtin/refs.c @@ -88,7 +88,7 @@ static int cmd_refs_verify(int argc, const char **argv, const char *prefix, git_config(git_fsck_config, &fsck_refs_options); prepare_repo_settings(the_repository); - worktrees = get_worktrees(); + worktrees = get_worktrees_without_reading_head(); for (size_t i = 0; worktrees[i]; i++) ret |= refs_fsck(get_worktree_ref_store(worktrees[i]), &fsck_refs_options, worktrees[i]); diff --git a/worktree.c b/worktree.c index 248bbb39d43ae8..89b7d86cef1af1 100644 --- a/worktree.c +++ b/worktree.c @@ -175,6 +175,11 @@ struct worktree **get_worktrees(void) return get_worktrees_internal(0); } +struct worktree **get_worktrees_without_reading_head(void) +{ + return get_worktrees_internal(1); +} + const char *get_worktree_git_dir(const struct worktree *wt) { if (!wt) diff --git a/worktree.h b/worktree.h index 38145df80f4107..a305c7e2c7282a 100644 --- a/worktree.h +++ b/worktree.h @@ -30,6 +30,14 @@ struct worktree { */ struct worktree **get_worktrees(void); +/* + * Like `get_worktrees`, but does not read HEAD. Skip reading HEAD allows to + * get the worktree without worrying about failures pertaining to parsing + * the HEAD ref. This is useful in contexts where it is assumed that the + * refdb may not be in a consistent state. + */ +struct worktree **get_worktrees_without_reading_head(void); + /* * Returns 1 if linked worktrees exist, 0 otherwise. */ From cfea2f2da8d418edd7984e5b5073825834099a27 Mon Sep 17 00:00:00 2001 From: shejialuo Date: Fri, 28 Feb 2025 00:06:24 +0800 Subject: [PATCH 033/775] packed-backend: check whether the "packed-refs" is regular file Although "git-fsck(1)" and "packed-backend.c" will check some consistency and correctness of "packed-refs" file, they never check the filetype of the "packed-refs". Let's verify that the "packed-refs" has the expected filetype, confirming it is created by "git pack-refs" command. We could use "open_nofollow" wrapper to open the raw "packed-refs" file. If the returned "fd" value is less than 0, we could check whether the "errno" is "ELOOP" to report an error to the user. And then we use "fstat" to check whether the "packed-refs" file is a regular file. Reuse "FSCK_MSG_BAD_REF_FILETYPE" fsck message id to report the error to the user if "packed-refs" is not a regular file. Mentored-by: Patrick Steinhardt Mentored-by: Karthik Nayak Signed-off-by: shejialuo Signed-off-by: Junio C Hamano --- refs/packed-backend.c | 52 ++++++++++++++++++++++++++++++++++++---- t/t0602-reffiles-fsck.sh | 30 +++++++++++++++++++++++ 2 files changed, 78 insertions(+), 4 deletions(-) diff --git a/refs/packed-backend.c b/refs/packed-backend.c index a7b6f74b6e35f8..1fba804a2aad07 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -4,6 +4,7 @@ #include "../git-compat-util.h" #include "../config.h" #include "../dir.h" +#include "../fsck.h" #include "../gettext.h" #include "../hash.h" #include "../hex.h" @@ -1748,15 +1749,58 @@ static struct ref_iterator *packed_reflog_iterator_begin(struct ref_store *ref_s return empty_ref_iterator_begin(); } -static int packed_fsck(struct ref_store *ref_store UNUSED, - struct fsck_options *o UNUSED, +static int packed_fsck(struct ref_store *ref_store, + struct fsck_options *o, struct worktree *wt) { + struct packed_ref_store *refs = packed_downcast(ref_store, + REF_STORE_READ, "fsck"); + struct stat st; + int ret = 0; + int fd = -1; if (!is_main_worktree(wt)) - return 0; + goto cleanup; - return 0; + if (o->verbose) + fprintf_ln(stderr, "Checking packed-refs file %s", refs->path); + + fd = open_nofollow(refs->path, O_RDONLY); + if (fd < 0) { + /* + * If the packed-refs file doesn't exist, there's nothing + * to check. + */ + if (errno == ENOENT) + goto cleanup; + + if (errno == ELOOP) { + struct fsck_ref_report report = { 0 }; + report.path = "packed-refs"; + ret = fsck_report_ref(o, &report, + FSCK_MSG_BAD_REF_FILETYPE, + "not a regular file but a symlink"); + goto cleanup; + } + + ret = error_errno(_("unable to open '%s'"), refs->path); + goto cleanup; + } else if (fstat(fd, &st) < 0) { + ret = error_errno(_("unable to stat '%s'"), refs->path); + goto cleanup; + } else if (!S_ISREG(st.st_mode)) { + struct fsck_ref_report report = { 0 }; + report.path = "packed-refs"; + ret = fsck_report_ref(o, &report, + FSCK_MSG_BAD_REF_FILETYPE, + "not a regular file"); + goto cleanup; + } + +cleanup: + if (fd >= 0) + close(fd); + return ret; } struct ref_storage_be refs_be_packed = { diff --git a/t/t0602-reffiles-fsck.sh b/t/t0602-reffiles-fsck.sh index cf7a202d0ded7c..68b7d4999e0b40 100755 --- a/t/t0602-reffiles-fsck.sh +++ b/t/t0602-reffiles-fsck.sh @@ -617,4 +617,34 @@ test_expect_success 'ref content checks should work with worktrees' ' ) ' +test_expect_success SYMLINKS 'the filetype of packed-refs should be checked' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit default && + git branch branch-1 && + git branch branch-2 && + git branch branch-3 && + git pack-refs --all && + + mv .git/packed-refs .git/packed-refs-back && + ln -sf packed-refs-back .git/packed-refs && + test_must_fail git refs verify 2>err && + cat >expect <<-EOF && + error: packed-refs: badRefFiletype: not a regular file but a symlink + EOF + rm .git/packed-refs && + test_cmp expect err && + + mkdir .git/packed-refs && + test_must_fail git refs verify 2>err && + cat >expect <<-EOF && + error: packed-refs: badRefFiletype: not a regular file + EOF + rm -r .git/packed-refs && + test_cmp expect err + ) +' + test_done From 515579756c8d23ea2f3c6386a9333ecbe84d8a08 Mon Sep 17 00:00:00 2001 From: shejialuo Date: Fri, 28 Feb 2025 00:06:40 +0800 Subject: [PATCH 034/775] packed-backend: check if header starts with "# pack-refs with: " We always write a space after "# pack-refs with:" but we don't align with this rule in the "create_snapshot" method where we would check whether header starts with "# pack-refs with:". It might seem that we should undoubtedly tighten this rule, however, we don't have any technical documentation about this and there is a possibility that we would break the compatibility for other third-party libraries. By investigating influential third-party libraries, we could conclude how these libraries handle the header of "packed-refs" file: 1. libgit2 is fine and always writes the space. It also expects the whitespace to exist. 2. JGit does not expect th header to have a trailing space, but expects the "peeled" capability to have a leading space, which is mostly equivalent because that capability is typically the first one we write. It always writes the space. 3. gitoxide expects the space t exist and writes it. 4. go-git doesn't create the header by default. As many third-party libraries expect a single space after "# pack-refs with:", if we forget to write the space after the colon, "create_snapshot" won't catch this. And we would break other re-implementations. So, we'd better tighten the rule by checking whether the header starts with "# pack-refs with: ". Mentored-by: Patrick Steinhardt Mentored-by: Karthik Nayak Signed-off-by: shejialuo Signed-off-by: Junio C Hamano --- refs/packed-backend.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 1fba804a2aad07..eaa8746f3ee54d 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -694,7 +694,7 @@ static struct snapshot *create_snapshot(struct packed_ref_store *refs) tmp = xmemdupz(snapshot->buf, eol - snapshot->buf); - if (!skip_prefix(tmp, "# pack-refs with:", (const char **)&p)) + if (!skip_prefix(tmp, "# pack-refs with: ", (const char **)&p)) die_invalid_line(refs->path, snapshot->buf, snapshot->eof - snapshot->buf); From c92e7e156e6b406e7555fb5df058d18758a0b3f0 Mon Sep 17 00:00:00 2001 From: shejialuo Date: Fri, 28 Feb 2025 00:06:49 +0800 Subject: [PATCH 035/775] packed-backend: add "packed-refs" header consistency check In "packed-backend.c::create_snapshot", if there is a header (the line which starts with '#'), we will check whether the line starts with "# pack-refs with: ". However, we need to consider other situations and discuss whether we need to add checks. 1. If the header does not exist, we should not report an error to the user. This is because in older Git version, we never write header in the "packed-refs" file. Also, we do allow no header in "packed-refs" in runtime. 2. If the header content does not start with "# packed-ref with: ", we should report an error just like what "create_snapshot" does. So, create a new fsck message "badPackedRefHeader(ERROR)" for this. 3. If the header content is not the same as the constant string "PACKED_REFS_HEADER". This is expected because we make it extensible intentionally and runtime "create_snapshot" won't complain about unknown traits. In order to align with the runtime behavior. There is no need to report. As we have analyzed, we only need to check the case 2 in the above. In order to do this, use "open_nofollow" function to get the file descriptor and then read the "packed-refs" file via "strbuf_read". Like what "create_snapshot" and other functions do, we could split the line by finding the next newline in the buffer. When we cannot find a newline, we could report an error. So, create a function "packed_fsck_ref_next_line" to find the next newline and if there is no such newline, use "packedRefEntryNotTerminated(ERROR)" to report an error to the user. Then, parse the first line to apply the checks. Update the test to exercise the code. Mentored-by: Patrick Steinhardt Mentored-by: Karthik Nayak Signed-off-by: shejialuo Signed-off-by: Junio C Hamano --- Documentation/fsck-msgids.txt | 8 ++++ fsck.h | 2 + refs/packed-backend.c | 73 +++++++++++++++++++++++++++++++++++ t/t0602-reffiles-fsck.sh | 52 +++++++++++++++++++++++++ 4 files changed, 135 insertions(+) diff --git a/Documentation/fsck-msgids.txt b/Documentation/fsck-msgids.txt index b14bc44ca4791f..11906f90fd99cc 100644 --- a/Documentation/fsck-msgids.txt +++ b/Documentation/fsck-msgids.txt @@ -16,6 +16,10 @@ `badObjectSha1`:: (ERROR) An object has a bad sha1. +`badPackedRefHeader`:: + (ERROR) The "packed-refs" file contains an invalid + header. + `badParentSha1`:: (ERROR) A commit object has a bad parent sha1. @@ -176,6 +180,10 @@ `nullSha1`:: (WARN) Tree contains entries pointing to a null sha1. +`packedRefEntryNotTerminated`:: + (ERROR) The "packed-refs" file contains an entry that is + not terminated by a newline. + `refMissingNewline`:: (INFO) A loose ref that does not end with newline(LF). As valid implementations of Git never created such a loose ref diff --git a/fsck.h b/fsck.h index a44c231a5f1391..67e3c97bc019d8 100644 --- a/fsck.h +++ b/fsck.h @@ -30,6 +30,7 @@ enum fsck_msg_type { FUNC(BAD_EMAIL, ERROR) \ FUNC(BAD_NAME, ERROR) \ FUNC(BAD_OBJECT_SHA1, ERROR) \ + FUNC(BAD_PACKED_REF_HEADER, ERROR) \ FUNC(BAD_PARENT_SHA1, ERROR) \ FUNC(BAD_REF_CONTENT, ERROR) \ FUNC(BAD_REF_FILETYPE, ERROR) \ @@ -53,6 +54,7 @@ enum fsck_msg_type { FUNC(MISSING_TYPE, ERROR) \ FUNC(MISSING_TYPE_ENTRY, ERROR) \ FUNC(MULTIPLE_AUTHORS, ERROR) \ + FUNC(PACKED_REF_ENTRY_NOT_TERMINATED, ERROR) \ FUNC(TREE_NOT_SORTED, ERROR) \ FUNC(UNKNOWN_TYPE, ERROR) \ FUNC(ZERO_PADDED_DATE, ERROR) \ diff --git a/refs/packed-backend.c b/refs/packed-backend.c index eaa8746f3ee54d..07154bccae8567 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -1749,12 +1749,76 @@ static struct ref_iterator *packed_reflog_iterator_begin(struct ref_store *ref_s return empty_ref_iterator_begin(); } +static int packed_fsck_ref_next_line(struct fsck_options *o, + unsigned long line_number, const char *start, + const char *eof, const char **eol) +{ + int ret = 0; + + *eol = memchr(start, '\n', eof - start); + if (!*eol) { + struct strbuf packed_entry = STRBUF_INIT; + struct fsck_ref_report report = { 0 }; + + strbuf_addf(&packed_entry, "packed-refs line %lu", line_number); + report.path = packed_entry.buf; + ret = fsck_report_ref(o, &report, + FSCK_MSG_PACKED_REF_ENTRY_NOT_TERMINATED, + "'%.*s' is not terminated with a newline", + (int)(eof - start), start); + + /* + * There is no newline but we still want to parse it to the end of + * the buffer. + */ + *eol = eof; + strbuf_release(&packed_entry); + } + + return ret; +} + +static int packed_fsck_ref_header(struct fsck_options *o, + const char *start, const char *eol) +{ + if (!starts_with(start, "# pack-refs with: ")) { + struct fsck_ref_report report = { 0 }; + report.path = "packed-refs.header"; + + return fsck_report_ref(o, &report, + FSCK_MSG_BAD_PACKED_REF_HEADER, + "'%.*s' does not start with '# pack-refs with: '", + (int)(eol - start), start); + } + + return 0; +} + +static int packed_fsck_ref_content(struct fsck_options *o, + const char *start, const char *eof) +{ + unsigned long line_number = 1; + const char *eol; + int ret = 0; + + ret |= packed_fsck_ref_next_line(o, line_number, start, eof, &eol); + if (*start == '#') { + ret |= packed_fsck_ref_header(o, start, eol); + + start = eol + 1; + line_number++; + } + + return ret; +} + static int packed_fsck(struct ref_store *ref_store, struct fsck_options *o, struct worktree *wt) { struct packed_ref_store *refs = packed_downcast(ref_store, REF_STORE_READ, "fsck"); + struct strbuf packed_ref_content = STRBUF_INIT; struct stat st; int ret = 0; int fd = -1; @@ -1797,9 +1861,18 @@ static int packed_fsck(struct ref_store *ref_store, goto cleanup; } + if (strbuf_read(&packed_ref_content, fd, 0) < 0) { + ret = error_errno(_("unable to read '%s'"), refs->path); + goto cleanup; + } + + ret = packed_fsck_ref_content(o, packed_ref_content.buf, + packed_ref_content.buf + packed_ref_content.len); + cleanup: if (fd >= 0) close(fd); + strbuf_release(&packed_ref_content); return ret; } diff --git a/t/t0602-reffiles-fsck.sh b/t/t0602-reffiles-fsck.sh index 68b7d4999e0b40..74d876984db35f 100755 --- a/t/t0602-reffiles-fsck.sh +++ b/t/t0602-reffiles-fsck.sh @@ -647,4 +647,56 @@ test_expect_success SYMLINKS 'the filetype of packed-refs should be checked' ' ) ' +test_expect_success 'packed-refs header should be checked' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit default && + + git refs verify 2>err && + test_must_be_empty err && + + for bad_header in "# pack-refs wit: peeled fully-peeled sorted " \ + "# pack-refs with traits: peeled fully-peeled sorted " \ + "# pack-refs with a: peeled fully-peeled" \ + "# pack-refs with:peeled fully-peeled sorted" + do + printf "%s\n" "$bad_header" >.git/packed-refs && + test_must_fail git refs verify 2>err && + cat >expect <<-EOF && + error: packed-refs.header: badPackedRefHeader: '\''$bad_header'\'' does not start with '\''# pack-refs with: '\'' + EOF + rm .git/packed-refs && + test_cmp expect err || return 1 + done + ) +' + +test_expect_success 'packed-refs missing header should not be reported' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit default && + + printf "$(git rev-parse HEAD) refs/heads/main\n" >.git/packed-refs && + git refs verify 2>err && + test_must_be_empty err + ) +' + +test_expect_success 'packed-refs unknown traits should not be reported' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit default && + + printf "# pack-refs with: peeled fully-peeled sorted foo\n" >.git/packed-refs && + git refs verify 2>err && + test_must_be_empty err + ) +' + test_done From 5637d5542021294e81cf0d8344fe140368117296 Mon Sep 17 00:00:00 2001 From: shejialuo Date: Fri, 28 Feb 2025 00:07:00 +0800 Subject: [PATCH 036/775] packed-backend: check whether the refname contains NUL characters "packed-backend.c::next_record" will use "check_refname_format" to check the consistency of the refname. If it is not OK, the program will die. However, it is reported in [1], we cannot catch some corruption. But we already have the code path and we must miss out something. We use the following code to get the refname: strbuf_add(&iter->refname_buf, p, eol - p); iter->base.refname = iter->refname_buf.buf In the above code, `p` is the start pointer of the refname and `eol` is the next newline pointer. We calculate the length of the refname by subtracting the two pointers. Then we add the memory range between `p` and `eol` to get the refname. However, if there are some NUL characters in the memory range between `p` and `eol`, we will see the refname as a valid ref name as long as the memory range between `p` and first occurred NUL character is valid. In order to catch above corruption, create a new function "refname_contains_nul" by searching the first NUL character. If it is not at the end of the string, there must be some NUL characters in the refname. Use this function in "next_record" function to die the program if "refname_contains_nul" returns true. [1] https://lore.kernel.org/git/6cfee0e4-3285-4f18-91ff-d097da9de737@rd10.de/ Reported-by: R. Diez Mentored-by: Patrick Steinhardt Mentored-by: Karthik Nayak Signed-off-by: shejialuo Signed-off-by: Junio C Hamano --- refs/packed-backend.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 07154bccae8567..9a90c52f70b56a 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -494,6 +494,21 @@ static void verify_buffer_safe(struct snapshot *snapshot) last_line, eof - last_line); } +/* + * When parsing the "packed-refs" file, we will parse it line by line. + * Because we know the start pointer of the refname and the next + * newline pointer, we could calculate the length of the refname by + * subtracting the two pointers. However, there is a corner case where + * the refname contains corrupted embedded NUL characters. And + * `check_refname_format()` will not catch this when the truncated + * refname is still a valid refname. To prevent this, we need to check + * whether the refname contains the NUL characters. + */ +static int refname_contains_nul(struct strbuf *refname) +{ + return !!memchr(refname->buf, '\0', refname->len); +} + #define SMALL_FILE_SIZE (32*1024) /* @@ -895,6 +910,9 @@ static int next_record(struct packed_ref_iterator *iter) strbuf_add(&iter->refname_buf, p, eol - p); iter->base.refname = iter->refname_buf.buf; + if (refname_contains_nul(&iter->refname_buf)) + die("packed refname contains embedded NULL: %s", iter->base.refname); + if (check_refname_format(iter->base.refname, REFNAME_ALLOW_ONELEVEL)) { if (!refname_is_safe(iter->base.refname)) die("packed refname is dangerous: %s", From e6ba4c07b85a0a8fee84b6ac7ab414d47a5351f2 Mon Sep 17 00:00:00 2001 From: shejialuo Date: Fri, 28 Feb 2025 00:07:17 +0800 Subject: [PATCH 037/775] packed-backend: add "packed-refs" entry consistency check "packed-backend.c::next_record" will parse the ref entry to check the consistency. This function has already checked the following things: 1. Parse the main line of the ref entry to inspect whether the oid is not correct. Then, check whether the next character is oid. Then check the refname. 2. If the next line starts with '^', it would continue to parse the peeled oid and check whether the last character is '\n'. As we decide to implement the ref consistency check for "packed-refs", let's port these two checks and update the test to exercise the code. Mentored-by: Patrick Steinhardt Mentored-by: Karthik Nayak Signed-off-by: shejialuo Signed-off-by: Junio C Hamano --- Documentation/fsck-msgids.txt | 3 + fsck.h | 1 + refs/packed-backend.c | 122 +++++++++++++++++++++++++++++++++- t/t0602-reffiles-fsck.sh | 44 ++++++++++++ 4 files changed, 169 insertions(+), 1 deletion(-) diff --git a/Documentation/fsck-msgids.txt b/Documentation/fsck-msgids.txt index 11906f90fd99cc..02a7bf0503286b 100644 --- a/Documentation/fsck-msgids.txt +++ b/Documentation/fsck-msgids.txt @@ -16,6 +16,9 @@ `badObjectSha1`:: (ERROR) An object has a bad sha1. +`badPackedRefEntry`:: + (ERROR) The "packed-refs" file contains an invalid entry. + `badPackedRefHeader`:: (ERROR) The "packed-refs" file contains an invalid header. diff --git a/fsck.h b/fsck.h index 67e3c97bc019d8..14d70f6653f1c3 100644 --- a/fsck.h +++ b/fsck.h @@ -30,6 +30,7 @@ enum fsck_msg_type { FUNC(BAD_EMAIL, ERROR) \ FUNC(BAD_NAME, ERROR) \ FUNC(BAD_OBJECT_SHA1, ERROR) \ + FUNC(BAD_PACKED_REF_ENTRY, ERROR) \ FUNC(BAD_PACKED_REF_HEADER, ERROR) \ FUNC(BAD_PARENT_SHA1, ERROR) \ FUNC(BAD_REF_CONTENT, ERROR) \ diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 9a90c52f70b56a..ef20300fd324c0 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -1812,9 +1812,114 @@ static int packed_fsck_ref_header(struct fsck_options *o, return 0; } +static int packed_fsck_ref_peeled_line(struct fsck_options *o, + struct ref_store *ref_store, + unsigned long line_number, + const char *start, const char *eol) +{ + struct strbuf packed_entry = STRBUF_INIT; + struct fsck_ref_report report = { 0 }; + struct object_id peeled; + const char *p; + int ret = 0; + + /* + * Skip the '^' and parse the peeled oid. + */ + start++; + if (parse_oid_hex_algop(start, &peeled, &p, ref_store->repo->hash_algo)) { + strbuf_addf(&packed_entry, "packed-refs line %lu", line_number); + report.path = packed_entry.buf; + + ret = fsck_report_ref(o, &report, + FSCK_MSG_BAD_PACKED_REF_ENTRY, + "'%.*s' has invalid peeled oid", + (int)(eol - start), start); + goto cleanup; + } + + if (p != eol) { + strbuf_addf(&packed_entry, "packed-refs line %lu", line_number); + report.path = packed_entry.buf; + + ret = fsck_report_ref(o, &report, + FSCK_MSG_BAD_PACKED_REF_ENTRY, + "has trailing garbage after peeled oid '%.*s'", + (int)(eol - p), p); + goto cleanup; + } + +cleanup: + strbuf_release(&packed_entry); + return ret; +} + +static int packed_fsck_ref_main_line(struct fsck_options *o, + struct ref_store *ref_store, + unsigned long line_number, + struct strbuf *refname, + const char *start, const char *eol) +{ + struct strbuf packed_entry = STRBUF_INIT; + struct fsck_ref_report report = { 0 }; + struct object_id oid; + const char *p; + int ret = 0; + + if (parse_oid_hex_algop(start, &oid, &p, ref_store->repo->hash_algo)) { + strbuf_addf(&packed_entry, "packed-refs line %lu", line_number); + report.path = packed_entry.buf; + + ret = fsck_report_ref(o, &report, + FSCK_MSG_BAD_PACKED_REF_ENTRY, + "'%.*s' has invalid oid", + (int)(eol - start), start); + goto cleanup; + } + + if (p == eol || !isspace(*p)) { + strbuf_addf(&packed_entry, "packed-refs line %lu", line_number); + report.path = packed_entry.buf; + + ret = fsck_report_ref(o, &report, + FSCK_MSG_BAD_PACKED_REF_ENTRY, + "has no space after oid '%s' but with '%.*s'", + oid_to_hex(&oid), (int)(eol - p), p); + goto cleanup; + } + + p++; + strbuf_reset(refname); + strbuf_add(refname, p, eol - p); + if (refname_contains_nul(refname)) { + strbuf_addf(&packed_entry, "packed-refs line %lu", line_number); + report.path = packed_entry.buf; + + ret = fsck_report_ref(o, &report, + FSCK_MSG_BAD_PACKED_REF_ENTRY, + "refname '%s' contains NULL binaries", + refname->buf); + } + + if (check_refname_format(refname->buf, 0)) { + strbuf_addf(&packed_entry, "packed-refs line %lu", line_number); + report.path = packed_entry.buf; + + ret = fsck_report_ref(o, &report, + FSCK_MSG_BAD_REF_NAME, + "has bad refname '%s'", refname->buf); + } + +cleanup: + strbuf_release(&packed_entry); + return ret; +} + static int packed_fsck_ref_content(struct fsck_options *o, + struct ref_store *ref_store, const char *start, const char *eof) { + struct strbuf refname = STRBUF_INIT; unsigned long line_number = 1; const char *eol; int ret = 0; @@ -1827,6 +1932,21 @@ static int packed_fsck_ref_content(struct fsck_options *o, line_number++; } + while (start < eof) { + ret |= packed_fsck_ref_next_line(o, line_number, start, eof, &eol); + ret |= packed_fsck_ref_main_line(o, ref_store, line_number, &refname, start, eol); + start = eol + 1; + line_number++; + if (start < eof && *start == '^') { + ret |= packed_fsck_ref_next_line(o, line_number, start, eof, &eol); + ret |= packed_fsck_ref_peeled_line(o, ref_store, line_number, + start, eol); + start = eol + 1; + line_number++; + } + } + + strbuf_release(&refname); return ret; } @@ -1884,7 +2004,7 @@ static int packed_fsck(struct ref_store *ref_store, goto cleanup; } - ret = packed_fsck_ref_content(o, packed_ref_content.buf, + ret = packed_fsck_ref_content(o, ref_store, packed_ref_content.buf, packed_ref_content.buf + packed_ref_content.len); cleanup: diff --git a/t/t0602-reffiles-fsck.sh b/t/t0602-reffiles-fsck.sh index 74d876984db35f..a88c792ce1a312 100755 --- a/t/t0602-reffiles-fsck.sh +++ b/t/t0602-reffiles-fsck.sh @@ -699,4 +699,48 @@ test_expect_success 'packed-refs unknown traits should not be reported' ' ) ' +test_expect_success 'packed-refs content should be checked' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit default && + git branch branch-1 && + git branch branch-2 && + git tag -a annotated-tag-1 -m tag-1 && + git tag -a annotated-tag-2 -m tag-2 && + + branch_1_oid=$(git rev-parse branch-1) && + branch_2_oid=$(git rev-parse branch-2) && + tag_1_oid=$(git rev-parse annotated-tag-1) && + tag_2_oid=$(git rev-parse annotated-tag-2) && + tag_1_peeled_oid=$(git rev-parse annotated-tag-1^{}) && + tag_2_peeled_oid=$(git rev-parse annotated-tag-2^{}) && + short_oid=$(printf "%s" $tag_1_peeled_oid | cut -c 1-4) && + + cat >.git/packed-refs <<-EOF && + # pack-refs with: peeled fully-peeled sorted + $short_oid refs/heads/branch-1 + ${branch_1_oid}x + $branch_2_oid refs/heads/bad-branch + $branch_2_oid refs/heads/branch. + $tag_1_oid refs/tags/annotated-tag-3 + ^$short_oid + $tag_2_oid refs/tags/annotated-tag-4. + ^$tag_2_peeled_oid garbage + EOF + test_must_fail git refs verify 2>err && + cat >expect <<-EOF && + error: packed-refs line 2: badPackedRefEntry: '\''$short_oid refs/heads/branch-1'\'' has invalid oid + error: packed-refs line 3: badPackedRefEntry: has no space after oid '\''$branch_1_oid'\'' but with '\''x'\'' + error: packed-refs line 4: badRefName: has bad refname '\'' refs/heads/bad-branch'\'' + error: packed-refs line 5: badRefName: has bad refname '\''refs/heads/branch.'\'' + error: packed-refs line 7: badPackedRefEntry: '\''$short_oid'\'' has invalid peeled oid + error: packed-refs line 8: badRefName: has bad refname '\''refs/tags/annotated-tag-4.'\'' + error: packed-refs line 9: badPackedRefEntry: has trailing garbage after peeled oid '\'' garbage'\'' + EOF + test_cmp expect err + ) +' + test_done From e1c9548eae406a7aa7274685f8bdbc353827bf16 Mon Sep 17 00:00:00 2001 From: shejialuo Date: Fri, 28 Feb 2025 00:07:40 +0800 Subject: [PATCH 038/775] packed-backend: check whether the "packed-refs" is sorted When there is a "sorted" trait in the header of the "packed-refs" file, it means that each entry is sorted increasingly by comparing the refname. We should add checks to verify whether the "packed-refs" is sorted in this case. Update the "packed_fsck_ref_header" to know whether there is a "sorted" trail in the header. It may seem that we could record all refnames during the parsing process and then compare later. However, this is not a good design due to the following reasons: 1. Because we need to store the state across the whole checking lifetime, we would consume a lot of memory if there are many entries in the "packed-refs" file. 2. We cannot reuse the existing compare function "cmp_packed_ref_records" which cause repetition. Because "cmp_packed_ref_records" needs an extra parameter "struct snaphost", extract the common part into a new function "cmp_packed_ref_records" to reuse this function to compare. Then, create a new function "packed_fsck_ref_sorted" to parse the file again and user the new fsck message "packedRefUnsorted(ERROR)" to report to the user if the file is not sorted. Mentored-by: Patrick Steinhardt Mentored-by: Karthik Nayak Signed-off-by: shejialuo Signed-off-by: Junio C Hamano --- Documentation/fsck-msgids.txt | 3 + fsck.h | 1 + refs/packed-backend.c | 116 +++++++++++++++++++++++++++++----- t/t0602-reffiles-fsck.sh | 87 +++++++++++++++++++++++++ 4 files changed, 191 insertions(+), 16 deletions(-) diff --git a/Documentation/fsck-msgids.txt b/Documentation/fsck-msgids.txt index 02a7bf0503286b..9601fff22854b6 100644 --- a/Documentation/fsck-msgids.txt +++ b/Documentation/fsck-msgids.txt @@ -187,6 +187,9 @@ (ERROR) The "packed-refs" file contains an entry that is not terminated by a newline. +`packedRefUnsorted`:: + (ERROR) The "packed-refs" file is not sorted. + `refMissingNewline`:: (INFO) A loose ref that does not end with newline(LF). As valid implementations of Git never created such a loose ref diff --git a/fsck.h b/fsck.h index 14d70f6653f1c3..19f3cb27734bae 100644 --- a/fsck.h +++ b/fsck.h @@ -56,6 +56,7 @@ enum fsck_msg_type { FUNC(MISSING_TYPE_ENTRY, ERROR) \ FUNC(MULTIPLE_AUTHORS, ERROR) \ FUNC(PACKED_REF_ENTRY_NOT_TERMINATED, ERROR) \ + FUNC(PACKED_REF_UNSORTED, ERROR) \ FUNC(TREE_NOT_SORTED, ERROR) \ FUNC(UNKNOWN_TYPE, ERROR) \ FUNC(ZERO_PADDED_DATE, ERROR) \ diff --git a/refs/packed-backend.c b/refs/packed-backend.c index ef20300fd324c0..813e5020e4de80 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -300,14 +300,9 @@ struct snapshot_record { size_t len; }; -static int cmp_packed_ref_records(const void *v1, const void *v2, - void *cb_data) -{ - const struct snapshot *snapshot = cb_data; - const struct snapshot_record *e1 = v1, *e2 = v2; - const char *r1 = e1->start + snapshot_hexsz(snapshot) + 1; - const char *r2 = e2->start + snapshot_hexsz(snapshot) + 1; +static int cmp_packed_refname(const char *r1, const char *r2) +{ while (1) { if (*r1 == '\n') return *r2 == '\n' ? 0 : -1; @@ -322,6 +317,17 @@ static int cmp_packed_ref_records(const void *v1, const void *v2, } } +static int cmp_packed_ref_records(const void *v1, const void *v2, + void *cb_data) +{ + const struct snapshot *snapshot = cb_data; + const struct snapshot_record *e1 = v1, *e2 = v2; + const char *r1 = e1->start + snapshot_hexsz(snapshot) + 1; + const char *r2 = e2->start + snapshot_hexsz(snapshot) + 1; + + return cmp_packed_refname(r1, r2); +} + /* * Compare a snapshot record at `rec` to the specified NUL-terminated * refname. @@ -1797,19 +1803,33 @@ static int packed_fsck_ref_next_line(struct fsck_options *o, } static int packed_fsck_ref_header(struct fsck_options *o, - const char *start, const char *eol) + const char *start, const char *eol, + unsigned int *sorted) { - if (!starts_with(start, "# pack-refs with: ")) { + struct string_list traits = STRING_LIST_INIT_NODUP; + char *tmp_line; + int ret = 0; + char *p; + + tmp_line = xmemdupz(start, eol - start); + if (!skip_prefix(tmp_line, "# pack-refs with: ", (const char **)&p)) { struct fsck_ref_report report = { 0 }; report.path = "packed-refs.header"; - return fsck_report_ref(o, &report, - FSCK_MSG_BAD_PACKED_REF_HEADER, - "'%.*s' does not start with '# pack-refs with: '", - (int)(eol - start), start); + ret = fsck_report_ref(o, &report, + FSCK_MSG_BAD_PACKED_REF_HEADER, + "'%.*s' does not start with '# pack-refs with: '", + (int)(eol - start), start); + goto cleanup; } - return 0; + string_list_split_in_place(&traits, p, " ", -1); + *sorted = unsorted_string_list_has_string(&traits, "sorted"); + +cleanup: + free(tmp_line); + string_list_clear(&traits, 0); + return ret; } static int packed_fsck_ref_peeled_line(struct fsck_options *o, @@ -1915,8 +1935,68 @@ static int packed_fsck_ref_main_line(struct fsck_options *o, return ret; } +static int packed_fsck_ref_sorted(struct fsck_options *o, + struct ref_store *ref_store, + const char *start, const char *eof) +{ + size_t hexsz = ref_store->repo->hash_algo->hexsz; + struct strbuf packed_entry = STRBUF_INIT; + struct fsck_ref_report report = { 0 }; + struct strbuf refname1 = STRBUF_INIT; + struct strbuf refname2 = STRBUF_INIT; + unsigned long line_number = 1; + const char *former = NULL; + const char *current; + const char *eol; + int ret = 0; + + if (*start == '#') { + eol = memchr(start, '\n', eof - start); + start = eol + 1; + line_number++; + } + + for (; start < eof; line_number++, start = eol + 1) { + eol = memchr(start, '\n', eof - start); + + if (*start == '^') + continue; + + if (!former) { + former = start + hexsz + 1; + continue; + } + + current = start + hexsz + 1; + if (cmp_packed_refname(former, current) >= 0) { + const char *err_fmt = + "refname '%s' is less than previous refname '%s'"; + + eol = memchr(former, '\n', eof - former); + strbuf_add(&refname1, former, eol - former); + eol = memchr(current, '\n', eof - current); + strbuf_add(&refname2, current, eol - current); + + strbuf_addf(&packed_entry, "packed-refs line %lu", line_number); + report.path = packed_entry.buf; + ret = fsck_report_ref(o, &report, + FSCK_MSG_PACKED_REF_UNSORTED, + err_fmt, refname2.buf, refname1.buf); + goto cleanup; + } + former = current; + } + +cleanup: + strbuf_release(&packed_entry); + strbuf_release(&refname1); + strbuf_release(&refname2); + return ret; +} + static int packed_fsck_ref_content(struct fsck_options *o, struct ref_store *ref_store, + unsigned int *sorted, const char *start, const char *eof) { struct strbuf refname = STRBUF_INIT; @@ -1926,7 +2006,7 @@ static int packed_fsck_ref_content(struct fsck_options *o, ret |= packed_fsck_ref_next_line(o, line_number, start, eof, &eol); if (*start == '#') { - ret |= packed_fsck_ref_header(o, start, eol); + ret |= packed_fsck_ref_header(o, start, eol, sorted); start = eol + 1; line_number++; @@ -1957,6 +2037,7 @@ static int packed_fsck(struct ref_store *ref_store, struct packed_ref_store *refs = packed_downcast(ref_store, REF_STORE_READ, "fsck"); struct strbuf packed_ref_content = STRBUF_INIT; + unsigned int sorted = 0; struct stat st; int ret = 0; int fd = -1; @@ -2004,8 +2085,11 @@ static int packed_fsck(struct ref_store *ref_store, goto cleanup; } - ret = packed_fsck_ref_content(o, ref_store, packed_ref_content.buf, + ret = packed_fsck_ref_content(o, ref_store, &sorted, packed_ref_content.buf, packed_ref_content.buf + packed_ref_content.len); + if (!ret && sorted) + ret = packed_fsck_ref_sorted(o, ref_store, packed_ref_content.buf, + packed_ref_content.buf + packed_ref_content.len); cleanup: if (fd >= 0) diff --git a/t/t0602-reffiles-fsck.sh b/t/t0602-reffiles-fsck.sh index a88c792ce1a312..767e2bd4a0640a 100755 --- a/t/t0602-reffiles-fsck.sh +++ b/t/t0602-reffiles-fsck.sh @@ -743,4 +743,91 @@ test_expect_success 'packed-refs content should be checked' ' ) ' +test_expect_success 'packed-ref with sorted trait should be checked' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit default && + git branch branch-1 && + git branch branch-2 && + git tag -a annotated-tag-1 -m tag-1 && + branch_1_oid=$(git rev-parse branch-1) && + branch_2_oid=$(git rev-parse branch-2) && + tag_1_oid=$(git rev-parse annotated-tag-1) && + tag_1_peeled_oid=$(git rev-parse annotated-tag-1^{}) && + refname1="refs/heads/main" && + refname2="refs/heads/foo" && + refname3="refs/tags/foo" && + + cat >.git/packed-refs <<-EOF && + # pack-refs with: peeled fully-peeled sorted + EOF + git refs verify 2>err && + rm .git/packed-refs && + test_must_be_empty err && + + cat >.git/packed-refs <<-EOF && + # pack-refs with: peeled fully-peeled sorted + $branch_2_oid $refname1 + EOF + git refs verify 2>err && + rm .git/packed-refs && + test_must_be_empty err && + + cat >.git/packed-refs <<-EOF && + # pack-refs with: peeled fully-peeled sorted + $branch_2_oid $refname1 + $branch_1_oid $refname2 + $tag_1_oid $refname3 + EOF + test_must_fail git refs verify 2>err && + cat >expect <<-EOF && + error: packed-refs line 3: packedRefUnsorted: refname '\''$refname2'\'' is less than previous refname '\''$refname1'\'' + EOF + rm .git/packed-refs && + test_cmp expect err && + + cat >.git/packed-refs <<-EOF && + # pack-refs with: peeled fully-peeled sorted + $tag_1_oid $refname3 + ^$tag_1_peeled_oid + $branch_2_oid $refname2 + EOF + test_must_fail git refs verify 2>err && + cat >expect <<-EOF && + error: packed-refs line 4: packedRefUnsorted: refname '\''$refname2'\'' is less than previous refname '\''$refname3'\'' + EOF + rm .git/packed-refs && + test_cmp expect err + ) +' + +test_expect_success 'packed-ref without sorted trait should not be checked' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit default && + git branch branch-1 && + git branch branch-2 && + git tag -a annotated-tag-1 -m tag-1 && + branch_1_oid=$(git rev-parse branch-1) && + branch_2_oid=$(git rev-parse branch-2) && + tag_1_oid=$(git rev-parse annotated-tag-1) && + tag_1_peeled_oid=$(git rev-parse annotated-tag-1^{}) && + refname1="refs/heads/main" && + refname2="refs/heads/foo" && + refname3="refs/tags/foo" && + + cat >.git/packed-refs <<-EOF && + # pack-refs with: peeled fully-peeled + $branch_2_oid $refname1 + $branch_1_oid $refname2 + EOF + git refs verify 2>err && + test_must_be_empty err + ) +' + test_done From c1cf918d3adbe832fdaf278a23151502be255bb2 Mon Sep 17 00:00:00 2001 From: shejialuo Date: Fri, 28 Feb 2025 00:07:48 +0800 Subject: [PATCH 039/775] builtin/fsck: add `git refs verify` child process At now, we have already implemented the ref consistency checks for both "files-backend" and "packed-backend". Although we would check some redundant things, it won't cause trouble. So, let's integrate it into the "git-fsck(1)" command to get feedback from the users. And also by calling "git refs verify" in "git-fsck(1)", we make sure that the new added checks don't break. Introduce a new function "fsck_refs" that initializes and runs a child process to execute the "git refs verify" command. In order to provide the user interface create a progress which makes the total task be 1. It's hard to know how many loose refs we will check now. We might improve this later. Then, introduce the option to allow the user to disable checking ref database consistency. Put this function in the very first execution sequence of "git-fsck(1)" due to that we don't want the existing code of "git-fsck(1)" which would implicitly check the consistency of refs to die the program. Last, update the test to exercise the code. Mentored-by: Patrick Steinhardt Mentored-by: Karthik Nayak Signed-off-by: shejialuo Signed-off-by: Junio C Hamano --- Documentation/git-fsck.txt | 7 ++++++- builtin/fsck.c | 33 +++++++++++++++++++++++++++++++- t/t0602-reffiles-fsck.sh | 39 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 77 insertions(+), 2 deletions(-) diff --git a/Documentation/git-fsck.txt b/Documentation/git-fsck.txt index 5b82e4605c2e91..5e71a29c3b02d9 100644 --- a/Documentation/git-fsck.txt +++ b/Documentation/git-fsck.txt @@ -12,7 +12,7 @@ SYNOPSIS 'git fsck' [--tags] [--root] [--unreachable] [--cache] [--no-reflogs] [--[no-]full] [--strict] [--verbose] [--lost-found] [--[no-]dangling] [--[no-]progress] [--connectivity-only] - [--[no-]name-objects] [...] + [--[no-]name-objects] [--[no-]references] [...] DESCRIPTION ----------- @@ -104,6 +104,11 @@ care about this output and want to speed it up further. progress status even if the standard error stream is not directed to a terminal. +--[no-]references:: + Control whether to check the references database consistency + via 'git refs verify'. See linkgit:git-refs[1] for details. + The default is to check the references database. + CONFIGURATION ------------- diff --git a/builtin/fsck.c b/builtin/fsck.c index 0196c54eb68ee5..620efe8368e148 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -50,6 +50,7 @@ static int verbose; static int show_progress = -1; static int show_dangling = 1; static int name_objects; +static int check_references = 1; #define ERROR_OBJECT 01 #define ERROR_REACHABLE 02 #define ERROR_PACK 04 @@ -902,11 +903,37 @@ static int check_pack_rev_indexes(struct repository *r, int show_progress) return res; } +static void fsck_refs(struct repository *r) +{ + struct child_process refs_verify = CHILD_PROCESS_INIT; + struct progress *progress = NULL; + + if (show_progress) + progress = start_progress(r, _("Checking ref database"), 1); + + if (verbose) + fprintf_ln(stderr, _("Checking ref database")); + + child_process_init(&refs_verify); + refs_verify.git_cmd = 1; + strvec_pushl(&refs_verify.args, "refs", "verify", NULL); + if (verbose) + strvec_push(&refs_verify.args, "--verbose"); + if (check_strict) + strvec_push(&refs_verify.args, "--strict"); + + if (run_command(&refs_verify)) + errors_found |= ERROR_REFS; + + display_progress(progress, 1); + stop_progress(&progress); +} + static char const * const fsck_usage[] = { N_("git fsck [--tags] [--root] [--unreachable] [--cache] [--no-reflogs]\n" " [--[no-]full] [--strict] [--verbose] [--lost-found]\n" " [--[no-]dangling] [--[no-]progress] [--connectivity-only]\n" - " [--[no-]name-objects] [...]"), + " [--[no-]name-objects] [--[no-]references] [...]"), NULL }; @@ -925,6 +952,7 @@ static struct option fsck_opts[] = { N_("write dangling objects in .git/lost-found")), OPT_BOOL(0, "progress", &show_progress, N_("show progress")), OPT_BOOL(0, "name-objects", &name_objects, N_("show verbose names for reachable objects")), + OPT_BOOL(0, "references", &check_references, N_("check reference database consistency")), OPT_END(), }; @@ -967,6 +995,9 @@ int cmd_fsck(int argc, git_config(git_fsck_config, &fsck_obj_options); prepare_repo_settings(the_repository); + if (check_references) + fsck_refs(the_repository); + if (connectivity_only) { for_each_loose_object(mark_loose_for_connectivity, NULL, 0); for_each_packed_object(the_repository, diff --git a/t/t0602-reffiles-fsck.sh b/t/t0602-reffiles-fsck.sh index 767e2bd4a0640a..9d1dc2144c4b72 100755 --- a/t/t0602-reffiles-fsck.sh +++ b/t/t0602-reffiles-fsck.sh @@ -830,4 +830,43 @@ test_expect_success 'packed-ref without sorted trait should not be checked' ' ) ' +test_expect_success '--[no-]references option should apply to fsck' ' + test_when_finished "rm -rf repo" && + git init repo && + branch_dir_prefix=.git/refs/heads && + ( + cd repo && + test_commit default && + for trailing_content in " garbage" " more garbage" + do + printf "%s" "$(git rev-parse HEAD)$trailing_content" >$branch_dir_prefix/branch-garbage && + git fsck 2>err && + cat >expect <<-EOF && + warning: refs/heads/branch-garbage: trailingRefContent: has trailing garbage: '\''$trailing_content'\'' + EOF + rm $branch_dir_prefix/branch-garbage && + test_cmp expect err || return 1 + done && + + for trailing_content in " garbage" " more garbage" + do + printf "%s" "$(git rev-parse HEAD)$trailing_content" >$branch_dir_prefix/branch-garbage && + git fsck --references 2>err && + cat >expect <<-EOF && + warning: refs/heads/branch-garbage: trailingRefContent: has trailing garbage: '\''$trailing_content'\'' + EOF + rm $branch_dir_prefix/branch-garbage && + test_cmp expect err || return 1 + done && + + for trailing_content in " garbage" " more garbage" + do + printf "%s" "$(git rev-parse HEAD)$trailing_content" >$branch_dir_prefix/branch-garbage && + git fsck --no-references 2>err && + rm $branch_dir_prefix/branch-garbage && + test_must_be_empty err || return 1 + done + ) +' + test_done From 7c67d2a07055150b76e81efa8850221d52705305 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Fri, 28 Feb 2025 15:33:43 -0600 Subject: [PATCH 040/775] diff: return diff_filepair from diff queue helpers The `diff_addremove()` and `diff_change()` functions set up and queue diffs, but do not return the `diff_filepair` added to the queue. In a subsequent commit, modifications to `diff_filepair` need to occur in certain cases after being queued. Since the existing `diff_addremove()` and `diff_change()` are also used for callbacks in `diff_options` as types `add_remove_fn_t` and `change_fn_t`, modifying the existing function signatures requires further changes. The diff options for pruning use `file_add_remove()` and `file_change()` where file pairs do not even get queued. Thus, separate functions are implemented instead. Split out the queuing operations into `diff_queue_addremove()` and `diff_queue_change()` which also return a handle to the queued `diff_filepair`. Both `diff_addremove()` and `diff_change()` are reimplemented as thin wrappers around the new functions. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- diff.c | 70 +++++++++++++++++++++++++++++++++++++++++----------------- diff.h | 25 +++++++++++++++++++++ 2 files changed, 75 insertions(+), 20 deletions(-) diff --git a/diff.c b/diff.c index c89c15d98e0e29..4ddfe523fe9b36 100644 --- a/diff.c +++ b/diff.c @@ -7161,16 +7161,19 @@ void compute_diffstat(struct diff_options *options, options->found_changes = !!diffstat->nr; } -void diff_addremove(struct diff_options *options, - int addremove, unsigned mode, - const struct object_id *oid, - int oid_valid, - const char *concatpath, unsigned dirty_submodule) +struct diff_filepair *diff_queue_addremove(struct diff_queue_struct *queue, + struct diff_options *options, + int addremove, unsigned mode, + const struct object_id *oid, + int oid_valid, + const char *concatpath, + unsigned dirty_submodule) { struct diff_filespec *one, *two; + struct diff_filepair *pair; if (S_ISGITLINK(mode) && is_submodule_ignored(concatpath, options)) - return; + return NULL; /* This may look odd, but it is a preparation for * feeding "there are unchanged files which should @@ -7190,7 +7193,7 @@ void diff_addremove(struct diff_options *options, if (options->prefix && strncmp(concatpath, options->prefix, options->prefix_length)) - return; + return NULL; one = alloc_filespec(concatpath); two = alloc_filespec(concatpath); @@ -7202,25 +7205,29 @@ void diff_addremove(struct diff_options *options, two->dirty_submodule = dirty_submodule; } - diff_queue(&diff_queued_diff, one, two); + pair = diff_queue(queue, one, two); if (!options->flags.diff_from_contents) options->flags.has_changes = 1; + + return pair; } -void diff_change(struct diff_options *options, - unsigned old_mode, unsigned new_mode, - const struct object_id *old_oid, - const struct object_id *new_oid, - int old_oid_valid, int new_oid_valid, - const char *concatpath, - unsigned old_dirty_submodule, unsigned new_dirty_submodule) +struct diff_filepair *diff_queue_change(struct diff_queue_struct *queue, + struct diff_options *options, + unsigned old_mode, unsigned new_mode, + const struct object_id *old_oid, + const struct object_id *new_oid, + int old_oid_valid, int new_oid_valid, + const char *concatpath, + unsigned old_dirty_submodule, + unsigned new_dirty_submodule) { struct diff_filespec *one, *two; struct diff_filepair *p; if (S_ISGITLINK(old_mode) && S_ISGITLINK(new_mode) && is_submodule_ignored(concatpath, options)) - return; + return NULL; if (options->flags.reverse_diff) { SWAP(old_mode, new_mode); @@ -7231,7 +7238,7 @@ void diff_change(struct diff_options *options, if (options->prefix && strncmp(concatpath, options->prefix, options->prefix_length)) - return; + return NULL; one = alloc_filespec(concatpath); two = alloc_filespec(concatpath); @@ -7239,19 +7246,42 @@ void diff_change(struct diff_options *options, fill_filespec(two, new_oid, new_oid_valid, new_mode); one->dirty_submodule = old_dirty_submodule; two->dirty_submodule = new_dirty_submodule; - p = diff_queue(&diff_queued_diff, one, two); + p = diff_queue(queue, one, two); if (options->flags.diff_from_contents) - return; + return p; if (options->flags.quick && options->skip_stat_unmatch && !diff_filespec_check_stat_unmatch(options->repo, p)) { diff_free_filespec_data(p->one); diff_free_filespec_data(p->two); - return; + return p; } options->flags.has_changes = 1; + + return p; +} + +void diff_addremove(struct diff_options *options, int addremove, unsigned mode, + const struct object_id *oid, int oid_valid, + const char *concatpath, unsigned dirty_submodule) +{ + diff_queue_addremove(&diff_queued_diff, options, addremove, mode, oid, + oid_valid, concatpath, dirty_submodule); +} + +void diff_change(struct diff_options *options, + unsigned old_mode, unsigned new_mode, + const struct object_id *old_oid, + const struct object_id *new_oid, + int old_oid_valid, int new_oid_valid, + const char *concatpath, + unsigned old_dirty_submodule, unsigned new_dirty_submodule) +{ + diff_queue_change(&diff_queued_diff, options, old_mode, new_mode, + old_oid, new_oid, old_oid_valid, new_oid_valid, + concatpath, old_dirty_submodule, new_dirty_submodule); } struct diff_filepair *diff_unmerge(struct diff_options *options, const char *path) diff --git a/diff.h b/diff.h index 0a566f5531ac42..63afa17e84a1f7 100644 --- a/diff.h +++ b/diff.h @@ -508,6 +508,31 @@ void diff_set_default_prefix(struct diff_options *options); int diff_can_quit_early(struct diff_options *); +/* + * Stages changes in the provided diff queue for file additions and deletions. + * If a file pair gets queued, it is returned. + */ +struct diff_filepair *diff_queue_addremove(struct diff_queue_struct *queue, + struct diff_options *, + int addremove, unsigned mode, + const struct object_id *oid, + int oid_valid, const char *fullpath, + unsigned dirty_submodule); + +/* + * Stages changes in the provided diff queue for file modifications. + * If a file pair gets queued, it is returned. + */ +struct diff_filepair *diff_queue_change(struct diff_queue_struct *queue, + struct diff_options *, + unsigned mode1, unsigned mode2, + const struct object_id *old_oid, + const struct object_id *new_oid, + int old_oid_valid, int new_oid_valid, + const char *fullpath, + unsigned dirty_submodule1, + unsigned dirty_submodule2); + void diff_addremove(struct diff_options *, int addremove, unsigned mode, From c8a8e04099a99d6eafbe6e9bb1db3b50996836f8 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Fri, 28 Feb 2025 15:33:44 -0600 Subject: [PATCH 041/775] diff: add option to skip resolving diff statuses By default, `diffcore_std()` resolves the statuses for queued diff file pairs by calling `diff_resolve_rename_copy()`. If status information is already manually set, invoking `diffcore_std()` may change the status value. Introduce the `skip_resolving_statuses` diff option that prevents `diffcore_std()` from resolving file pair statuses when enabled. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- diff.c | 2 +- diff.h | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/diff.c b/diff.c index 4ddfe523fe9b36..08f5e00a2cc921 100644 --- a/diff.c +++ b/diff.c @@ -7085,7 +7085,7 @@ void diffcore_std(struct diff_options *options) diffcore_order(options->orderfile); if (options->rotate_to) diffcore_rotate(options); - if (!options->found_follow) + if (!options->found_follow && !options->skip_resolving_statuses) /* See try_to_follow_renames() in tree-diff.c */ diff_resolve_rename_copy(); diffcore_apply_filter(options); diff --git a/diff.h b/diff.h index 63afa17e84a1f7..fc791ee2ccb373 100644 --- a/diff.h +++ b/diff.h @@ -353,6 +353,14 @@ struct diff_options { /* to support internal diff recursion by --follow hack*/ int found_follow; + /* + * By default, diffcore_std() resolves the statuses for queued diff file + * pairs by calling diff_resolve_rename_copy(). If status information + * has already been manually set, this option prevents diffcore_std() + * from resetting statuses. + */ + int skip_resolving_statuses; + /* Callback which allows tweaking the options in diff_setup_done(). */ void (*set_default)(struct diff_options *); From 5bd10b2adcf3a080169574dd08c5529de81eecb1 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Fri, 28 Feb 2025 15:33:45 -0600 Subject: [PATCH 042/775] builtin: introduce diff-pairs command Through git-diff(1), a single diff can be generated from a pair of blob revisions directly. Unfortunately, there is not a mechanism to compute batches of specific file pair diffs in a single process. Such a feature is particularly useful on the server-side where diffing between a large set of changes is not feasible all at once due to timeout concerns. To facilitate this, introduce git-diff-pairs(1) which acts as a backend passing its NUL-terminated raw diff format input from stdin through diff machinery to produce various forms of output such as patch or raw. The raw format was originally designed as an interchange format and represents the contents of the diff_queued_diff list making it possible to break the diff pipeline into separate stages. For example, git-diff-tree(1) can be used as a frontend to compute file pairs to queue and feed its raw output to git-diff-pairs(1) to compute patches. With this, batches of diffs can be progressively generated without having to recompute renames or retrieve object context. Something like the following: git diff-tree -r -z -M $old $new | git diff-pairs -p -z should generate the same output as `git diff-tree -p -M`. Furthermore, each line of raw diff formatted input can also be individually fed to a separate git-diff-pairs(1) process and still produce the same output. Based-on-patch-by: Jeff King Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- .gitignore | 1 + Documentation/git-diff-pairs.adoc | 56 +++++++++ Documentation/meson.build | 1 + Makefile | 1 + builtin.h | 1 + builtin/diff-pairs.c | 193 ++++++++++++++++++++++++++++++ command-list.txt | 1 + git.c | 1 + meson.build | 1 + t/meson.build | 1 + t/t4070-diff-pairs.sh | 81 +++++++++++++ 11 files changed, 338 insertions(+) create mode 100644 Documentation/git-diff-pairs.adoc create mode 100644 builtin/diff-pairs.c create mode 100755 t/t4070-diff-pairs.sh diff --git a/.gitignore b/.gitignore index 08a66ca50844c5..04c444404e4ba8 100644 --- a/.gitignore +++ b/.gitignore @@ -55,6 +55,7 @@ /git-diff /git-diff-files /git-diff-index +/git-diff-pairs /git-diff-tree /git-difftool /git-difftool--helper diff --git a/Documentation/git-diff-pairs.adoc b/Documentation/git-diff-pairs.adoc new file mode 100644 index 00000000000000..e31f2e2fbbc527 --- /dev/null +++ b/Documentation/git-diff-pairs.adoc @@ -0,0 +1,56 @@ +git-diff-pairs(1) +================= + +NAME +---- +git-diff-pairs - Compare the content and mode of provided blob pairs + +SYNOPSIS +-------- +[synopsis] +git diff-pairs -z [] + +DESCRIPTION +----------- +Show changes for file pairs provided on stdin. Input for this command must be +in the NUL-terminated raw output format as generated by commands such as `git +diff-tree -z -r --raw`. By default, the outputted diffs are computed and shown +in the patch format when stdin closes. + +Usage of this command enables the traditional diff pipeline to be broken up +into separate stages where `diff-pairs` acts as the output phase. Other +commands, such as `diff-tree`, may serve as a frontend to compute the raw +diff format used as input. + +Instead of computing diffs via `git diff-tree -p -M` in one step, `diff-tree` +can compute the file pairs and rename information without the blob diffs. This +output can be fed to `diff-pairs` to generate the underlying blob diffs as done +in the following example: + +----------------------------- +git diff-tree -z -r -M $a $b | +git diff-pairs -z +----------------------------- + +Computing the tree diff upfront with rename information allows patch output +from `diff-pairs` to be progressively computed over the course of potentially +multiple invocations. + +Pathspecs are not currently supported by `diff-pairs`. Pathspec limiting should +be performed by the upstream command generating the raw diffs used as input. + +Tree objects are not currently supported as input and are rejected. + +Abbreviated object IDs in the `diff-pairs` input are not supported. Outputted +object IDs can be abbreviated using the `--abbrev` option. + +OPTIONS +------- + +include::diff-options.adoc[] + +include::diff-generate-patch.adoc[] + +GIT +--- +Part of the linkgit:git[1] suite diff --git a/Documentation/meson.build b/Documentation/meson.build index 1129ce4c85676d..ce990e9fe531ab 100644 --- a/Documentation/meson.build +++ b/Documentation/meson.build @@ -42,6 +42,7 @@ manpages = { 'git-diagnose.adoc' : 1, 'git-diff-files.adoc' : 1, 'git-diff-index.adoc' : 1, + 'git-diff-pairs.adoc' : 1, 'git-difftool.adoc' : 1, 'git-diff-tree.adoc' : 1, 'git-diff.adoc' : 1, diff --git a/Makefile b/Makefile index 6d45093089d136..ee48bcad20979e 100644 --- a/Makefile +++ b/Makefile @@ -1242,6 +1242,7 @@ BUILTIN_OBJS += builtin/describe.o BUILTIN_OBJS += builtin/diagnose.o BUILTIN_OBJS += builtin/diff-files.o BUILTIN_OBJS += builtin/diff-index.o +BUILTIN_OBJS += builtin/diff-pairs.o BUILTIN_OBJS += builtin/diff-tree.o BUILTIN_OBJS += builtin/diff.o BUILTIN_OBJS += builtin/difftool.o diff --git a/builtin.h b/builtin.h index 89928ccf92f532..e6aad3a6a1f0ee 100644 --- a/builtin.h +++ b/builtin.h @@ -153,6 +153,7 @@ int cmd_diagnose(int argc, const char **argv, const char *prefix, struct reposit int cmd_diff_files(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_diff_index(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_diff(int argc, const char **argv, const char *prefix, struct repository *repo); +int cmd_diff_pairs(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_diff_tree(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_difftool(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_env__helper(int argc, const char **argv, const char *prefix, struct repository *repo); diff --git a/builtin/diff-pairs.c b/builtin/diff-pairs.c new file mode 100644 index 00000000000000..6be17c1abd4ea0 --- /dev/null +++ b/builtin/diff-pairs.c @@ -0,0 +1,193 @@ +#include "builtin.h" +#include "config.h" +#include "diff.h" +#include "diffcore.h" +#include "gettext.h" +#include "hash.h" +#include "hex.h" +#include "object.h" +#include "parse-options.h" +#include "revision.h" +#include "strbuf.h" + +static unsigned parse_mode_or_die(const char *mode, const char **end) +{ + uint16_t ret; + + *end = parse_mode(mode, &ret); + if (!*end) + die(_("unable to parse mode: %s"), mode); + return ret; +} + +static void parse_oid_or_die(const char *hex, struct object_id *oid, + const char **end, const struct git_hash_algo *algop) +{ + if (parse_oid_hex_algop(hex, oid, end, algop) || *(*end)++ != ' ') + die(_("unable to parse object id: %s"), hex); +} + +int cmd_diff_pairs(int argc, const char **argv, const char *prefix, + struct repository *repo) +{ + struct strbuf path_dst = STRBUF_INIT; + struct strbuf path = STRBUF_INIT; + struct strbuf meta = STRBUF_INIT; + struct option *parseopts; + struct rev_info revs; + int line_term = '\0'; + int ret; + + const char * const builtin_diff_pairs_usage[] = { + N_("git diff-pairs -z []"), + NULL + }; + struct option builtin_diff_pairs_options[] = { + OPT_END() + }; + + repo_init_revisions(repo, &revs, prefix); + + /* + * Diff options are usually parsed implicitly as part of + * setup_revisions(). Explicitly handle parsing to ensure options are + * printed in the usage message. + */ + parseopts = add_diff_options(builtin_diff_pairs_options, &revs.diffopt); + show_usage_with_options_if_asked(argc, argv, builtin_diff_pairs_usage, parseopts); + + repo_config(repo, git_diff_basic_config, NULL); + revs.disable_stdin = 1; + revs.abbrev = 0; + revs.diff = 1; + + argc = parse_options(argc, argv, prefix, parseopts, builtin_diff_pairs_usage, + PARSE_OPT_KEEP_ARGV0 | PARSE_OPT_KEEP_DASHDASH); + + if (setup_revisions(argc, argv, &revs, NULL) > 1) + usagef(_("unrecognized argument: %s"), argv[0]); + + /* + * With the -z option, both command input and raw output are + * NUL-delimited (this mode does not affect patch output). At present + * only NUL-delimited raw diff formatted input is supported. + */ + if (revs.diffopt.line_termination) + usage(_("working without -z is not supported")); + + if (revs.prune_data.nr) + usage(_("pathspec arguments not supported")); + + if (revs.pending.nr || revs.max_count != -1 || + revs.min_age != (timestamp_t)-1 || + revs.max_age != (timestamp_t)-1) + usage(_("revision arguments not allowed")); + + if (!revs.diffopt.output_format) + revs.diffopt.output_format = DIFF_FORMAT_PATCH; + + /* + * If rename detection is not requested, use rename information from the + * raw diff formatted input. Setting skip_resolving_statuses ensures + * diffcore_std() does not mess with rename information already present + * in queued filepairs. + */ + if (!revs.diffopt.detect_rename) + revs.diffopt.skip_resolving_statuses = 1; + + while (1) { + struct object_id oid_a, oid_b; + struct diff_filepair *pair; + unsigned mode_a, mode_b; + const char *p; + char status; + + if (strbuf_getwholeline(&meta, stdin, line_term) == EOF) + break; + + p = meta.buf; + if (*p != ':') + die(_("invalid raw diff input")); + p++; + + mode_a = parse_mode_or_die(p, &p); + mode_b = parse_mode_or_die(p, &p); + + if (S_ISDIR(mode_a) || S_ISDIR(mode_b)) + die(_("tree objects not supported")); + + parse_oid_or_die(p, &oid_a, &p, repo->hash_algo); + parse_oid_or_die(p, &oid_b, &p, repo->hash_algo); + + status = *p++; + + if (strbuf_getwholeline(&path, stdin, line_term) == EOF) + die(_("got EOF while reading path")); + + switch (status) { + case DIFF_STATUS_ADDED: + pair = diff_queue_addremove(&diff_queued_diff, + &revs.diffopt, '+', mode_b, + &oid_b, 1, path.buf, 0); + if (pair) + pair->status = status; + break; + + case DIFF_STATUS_DELETED: + pair = diff_queue_addremove(&diff_queued_diff, + &revs.diffopt, '-', mode_a, + &oid_a, 1, path.buf, 0); + if (pair) + pair->status = status; + break; + + case DIFF_STATUS_TYPE_CHANGED: + case DIFF_STATUS_MODIFIED: + pair = diff_queue_change(&diff_queued_diff, &revs.diffopt, + mode_a, mode_b, &oid_a, &oid_b, + 1, 1, path.buf, 0, 0); + if (pair) + pair->status = status; + break; + + case DIFF_STATUS_RENAMED: + case DIFF_STATUS_COPIED: { + struct diff_filespec *a, *b; + unsigned int score; + + if (strbuf_getwholeline(&path_dst, stdin, line_term) == EOF) + die(_("got EOF while reading destination path")); + + a = alloc_filespec(path.buf); + b = alloc_filespec(path_dst.buf); + fill_filespec(a, &oid_a, 1, mode_a); + fill_filespec(b, &oid_b, 1, mode_b); + + pair = diff_queue(&diff_queued_diff, a, b); + + if (strtoul_ui(p, 10, &score)) + die(_("unable to parse rename/copy score: %s"), p); + + pair->score = score * MAX_SCORE / 100; + pair->status = status; + pair->renamed_pair = 1; + } + break; + + default: + die(_("unknown diff status: %c"), status); + } + } + + diffcore_std(&revs.diffopt); + diff_flush(&revs.diffopt); + ret = diff_result_code(&revs); + + strbuf_release(&path_dst); + strbuf_release(&path); + strbuf_release(&meta); + release_revisions(&revs); + FREE_AND_NULL(parseopts); + + return ret; +} diff --git a/command-list.txt b/command-list.txt index c537114b4687b8..b7ade3ab9f3319 100644 --- a/command-list.txt +++ b/command-list.txt @@ -96,6 +96,7 @@ git-diagnose ancillaryinterrogators git-diff mainporcelain info git-diff-files plumbinginterrogators git-diff-index plumbinginterrogators +git-diff-pairs plumbinginterrogators git-diff-tree plumbinginterrogators git-difftool ancillaryinterrogators complete git-fast-export ancillarymanipulators diff --git a/git.c b/git.c index 450d6aaa863de4..77c435952232f6 100644 --- a/git.c +++ b/git.c @@ -541,6 +541,7 @@ static struct cmd_struct commands[] = { { "diff", cmd_diff, NO_PARSEOPT }, { "diff-files", cmd_diff_files, RUN_SETUP | NEED_WORK_TREE | NO_PARSEOPT }, { "diff-index", cmd_diff_index, RUN_SETUP | NO_PARSEOPT }, + { "diff-pairs", cmd_diff_pairs, RUN_SETUP | NO_PARSEOPT }, { "diff-tree", cmd_diff_tree, RUN_SETUP | NO_PARSEOPT }, { "difftool", cmd_difftool, RUN_SETUP_GENTLY }, { "fast-export", cmd_fast_export, RUN_SETUP }, diff --git a/meson.build b/meson.build index 021a182135f399..383e304798e7ae 100644 --- a/meson.build +++ b/meson.build @@ -540,6 +540,7 @@ builtin_sources = [ 'builtin/diagnose.c', 'builtin/diff-files.c', 'builtin/diff-index.c', + 'builtin/diff-pairs.c', 'builtin/diff-tree.c', 'builtin/diff.c', 'builtin/difftool.c', diff --git a/t/meson.build b/t/meson.build index 780939d49fda56..09c7bc2fad28fa 100644 --- a/t/meson.build +++ b/t/meson.build @@ -500,6 +500,7 @@ integration_tests = [ 't4067-diff-partial-clone.sh', 't4068-diff-symmetric-merge-base.sh', 't4069-remerge-diff.sh', + 't4070-diff-pairs.sh', 't4100-apply-stat.sh', 't4101-apply-nonl.sh', 't4102-apply-rename.sh', diff --git a/t/t4070-diff-pairs.sh b/t/t4070-diff-pairs.sh new file mode 100755 index 00000000000000..0878ad0ad168cd --- /dev/null +++ b/t/t4070-diff-pairs.sh @@ -0,0 +1,81 @@ +#!/bin/sh + +test_description='basic diff-pairs tests' +. ./test-lib.sh + +# This creates a diff with added, modified, deleted, renamed, copied, and +# typechange entries. This includes a submodule to test submodule diff support. +test_expect_success 'setup' ' + test_config_global protocol.file.allow always && + git init sub && + test_commit -C sub initial && + + git init main && + cd main && + echo to-be-gone >deleted && + echo original >modified && + echo now-a-file >symlink && + test_seq 200 >two-hundred && + test_seq 201 500 >five-hundred && + git add . && + test_tick && + git commit -m base && + git tag base && + + git submodule add ../sub && + echo now-here >added && + echo new >modified && + rm deleted && + mkdir subdir && + echo content >subdir/file && + mv two-hundred renamed && + test_seq 201 500 | sed s/300/modified/ >copied && + rm symlink && + git add -A . && + test_ln_s_add dest symlink && + test_tick && + git commit -m new && + git tag new +' + +test_expect_success 'diff-pairs recreates --raw' ' + git diff-tree -r -M -C -C -z base new >expect && + git diff-pairs --raw -z >actual expect && + git diff-tree -r -M -C -C -z base new | + git diff-pairs -p -z >actual && + test_cmp expect actual +' + +test_expect_success 'diff-pairs does not support normal raw diff input' ' + git diff-tree -r base new | + test_must_fail git diff-pairs >out 2>err && + + echo "usage: working without -z is not supported" >expect && + test_must_be_empty out && + test_cmp expect err +' + +test_expect_success 'diff-pairs does not support tree objects as input' ' + git diff-tree -z base new | + test_must_fail git diff-pairs -z >out 2>err && + + echo "fatal: tree objects not supported" >expect && + test_must_be_empty out && + test_cmp expect err +' + +test_expect_success 'diff-pairs does not support pathspec arguments' ' + git diff-tree -r -z base new | + test_must_fail git diff-pairs -z -- new >out 2>err && + + echo "usage: pathspec arguments not supported" >expect && + test_must_be_empty out && + test_cmp expect err +' + +test_done From cf15095ec52f1e997ec9eef42bdf275d2a68fe66 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Fri, 28 Feb 2025 15:33:46 -0600 Subject: [PATCH 043/775] builtin/diff-pairs: allow explicit diff queue flush The diffs queued from git-diff-pairs(1) are flushed when stdin is closed. To enable greater flexibility, allow control over when the diff queue is flushed by writing a single NUL byte on stdin between input file pairs. Diff output between flushes is separated by a single NUL byte. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- Documentation/git-diff-pairs.adoc | 4 ++++ builtin/diff-pairs.c | 14 ++++++++++++++ t/t4070-diff-pairs.sh | 9 +++++++++ 3 files changed, 27 insertions(+) diff --git a/Documentation/git-diff-pairs.adoc b/Documentation/git-diff-pairs.adoc index e31f2e2fbbc527..f99fcd1ead2479 100644 --- a/Documentation/git-diff-pairs.adoc +++ b/Documentation/git-diff-pairs.adoc @@ -17,6 +17,10 @@ in the NUL-terminated raw output format as generated by commands such as `git diff-tree -z -r --raw`. By default, the outputted diffs are computed and shown in the patch format when stdin closes. +A single NUL byte may be written to stdin between raw input lines to compute +file pair diffs up to that point instead of waiting for stdin to close. A NUL +byte is also written to the output to delimit between these batches of diffs. + Usage of this command enables the traditional diff pipeline to be broken up into separate stages where `diff-pairs` acts as the output phase. Other commands, such as `diff-tree`, may serve as a frontend to compute the raw diff --git a/builtin/diff-pairs.c b/builtin/diff-pairs.c index 6be17c1abd4ea0..71c045331ab729 100644 --- a/builtin/diff-pairs.c +++ b/builtin/diff-pairs.c @@ -57,6 +57,7 @@ int cmd_diff_pairs(int argc, const char **argv, const char *prefix, show_usage_with_options_if_asked(argc, argv, builtin_diff_pairs_usage, parseopts); repo_config(repo, git_diff_basic_config, NULL); + revs.diffopt.no_free = 1; revs.disable_stdin = 1; revs.abbrev = 0; revs.diff = 1; @@ -106,6 +107,18 @@ int cmd_diff_pairs(int argc, const char **argv, const char *prefix, break; p = meta.buf; + if (!*p) { + diffcore_std(&revs.diffopt); + diff_flush(&revs.diffopt); + /* + * When the diff queue is explicitly flushed, append a + * NUL byte to separate batches of diffs. + */ + fputc('\0', revs.diffopt.file); + fflush(revs.diffopt.file); + continue; + } + if (*p != ':') die(_("invalid raw diff input")); p++; @@ -179,6 +192,7 @@ int cmd_diff_pairs(int argc, const char **argv, const char *prefix, } } + revs.diffopt.no_free = 0; diffcore_std(&revs.diffopt); diff_flush(&revs.diffopt); ret = diff_result_code(&revs); diff --git a/t/t4070-diff-pairs.sh b/t/t4070-diff-pairs.sh index 0878ad0ad168cd..70deafb860e57f 100755 --- a/t/t4070-diff-pairs.sh +++ b/t/t4070-diff-pairs.sh @@ -78,4 +78,13 @@ test_expect_success 'diff-pairs does not support pathspec arguments' ' test_cmp expect err ' +test_expect_success 'diff-pairs explicit queue flush' ' + git diff-tree -r -M -C -C -z base new >expect && + printf "\0" >>expect && + git diff-tree -r -M -C -C -z base new >>expect && + + git diff-pairs --raw -z actual && + test_cmp expect actual +' + test_done From bc9342771b7438ddf785960cd418ab8c6a7aebef Mon Sep 17 00:00:00 2001 From: Seyi Kuforiji Date: Tue, 4 Mar 2025 12:33:22 +0100 Subject: [PATCH 044/775] t/unit-tests: convert trailer test to use clar Adapt trailer test file to use clar testing framework by using clar assertions where necessary. Split test into individual test functions for clarity and maintainability. Each test case now has its own function, making it easier to isolate failures and improve test readability. Mentored-by: Patrick Steinhardt Mentored-by: Phillip Wood Signed-off-by: Seyi Kuforiji Signed-off-by: Junio C Hamano --- Makefile | 2 +- t/meson.build | 2 +- t/unit-tests/t-trailer.c | 317 -------------------------------------- t/unit-tests/u-trailer.c | 320 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 322 insertions(+), 319 deletions(-) delete mode 100644 t/unit-tests/t-trailer.c create mode 100644 t/unit-tests/u-trailer.c diff --git a/Makefile b/Makefile index a9b2de06922798..9cb68aaa61ec14 100644 --- a/Makefile +++ b/Makefile @@ -1361,6 +1361,7 @@ CLAR_TEST_SUITES += u-reftable-tree CLAR_TEST_SUITES += u-strbuf CLAR_TEST_SUITES += u-strcmp-offset CLAR_TEST_SUITES += u-strvec +CLAR_TEST_SUITES += u-trailer CLAR_TEST_PROG = $(UNIT_TEST_BIN)/unit-tests$(X) CLAR_TEST_OBJS = $(patsubst %,$(UNIT_TEST_DIR)/%.o,$(CLAR_TEST_SUITES)) CLAR_TEST_OBJS += $(UNIT_TEST_DIR)/clar/clar.o @@ -1377,7 +1378,6 @@ UNIT_TEST_PROGRAMS += t-reftable-reader UNIT_TEST_PROGRAMS += t-reftable-readwrite UNIT_TEST_PROGRAMS += t-reftable-record UNIT_TEST_PROGRAMS += t-reftable-stack -UNIT_TEST_PROGRAMS += t-trailer UNIT_TEST_PROGRAMS += t-urlmatch-normalization UNIT_TEST_PROGS = $(patsubst %,$(UNIT_TEST_BIN)/%$X,$(UNIT_TEST_PROGRAMS)) UNIT_TEST_OBJS += $(UNIT_TEST_DIR)/test-lib.o diff --git a/t/meson.build b/t/meson.build index 25ce07270783f6..560aa9a1f1bc48 100644 --- a/t/meson.build +++ b/t/meson.build @@ -9,6 +9,7 @@ clar_test_suites = [ 'unit-tests/u-strbuf.c', 'unit-tests/u-strcmp-offset.c', 'unit-tests/u-strvec.c', + 'unit-tests/u-trailer.c', ] clar_sources = [ @@ -59,7 +60,6 @@ unit_test_programs = [ 'unit-tests/t-reftable-readwrite.c', 'unit-tests/t-reftable-record.c', 'unit-tests/t-reftable-stack.c', - 'unit-tests/t-trailer.c', 'unit-tests/t-urlmatch-normalization.c', ] diff --git a/t/unit-tests/t-trailer.c b/t/unit-tests/t-trailer.c deleted file mode 100644 index 184593e73d6e23..00000000000000 --- a/t/unit-tests/t-trailer.c +++ /dev/null @@ -1,317 +0,0 @@ -#define DISABLE_SIGN_COMPARE_WARNINGS - -#include "test-lib.h" -#include "trailer.h" - -struct contents { - const char *raw; - const char *key; - const char *val; -}; - -static void t_trailer_iterator(const char *msg, size_t num_expected, - struct contents *contents) -{ - struct trailer_iterator iter; - size_t i = 0; - - trailer_iterator_init(&iter, msg); - while (trailer_iterator_advance(&iter)) { - if (num_expected) { - check_str(iter.raw, contents[i].raw); - check_str(iter.key.buf, contents[i].key); - check_str(iter.val.buf, contents[i].val); - } - i++; - } - trailer_iterator_release(&iter); - - check_uint(i, ==, num_expected); -} - -static void run_t_trailer_iterator(void) -{ - - static struct test_cases { - const char *name; - const char *msg; - size_t num_expected; - struct contents contents[10]; - } tc[] = { - { - "empty input", - "", - 0, - {{0}}, - }, - { - "no newline at beginning", - "Fixes: x\n" - "Acked-by: x\n" - "Reviewed-by: x\n", - 0, - {{0}}, - }, - { - "newline at beginning", - "\n" - "Fixes: x\n" - "Acked-by: x\n" - "Reviewed-by: x\n", - 3, - { - { - .raw = "Fixes: x\n", - .key = "Fixes", - .val = "x", - }, - { - .raw = "Acked-by: x\n", - .key = "Acked-by", - .val = "x", - }, - { - .raw = "Reviewed-by: x\n", - .key = "Reviewed-by", - .val = "x", - }, - { - 0 - }, - }, - }, - { - "without body text", - "subject: foo bar\n" - "\n" - "Fixes: x\n" - "Acked-by: x\n" - "Reviewed-by: x\n", - 3, - { - { - .raw = "Fixes: x\n", - .key = "Fixes", - .val = "x", - }, - { - .raw = "Acked-by: x\n", - .key = "Acked-by", - .val = "x", - }, - { - .raw = "Reviewed-by: x\n", - .key = "Reviewed-by", - .val = "x", - }, - { - 0 - }, - }, - }, - { - "with body text, without divider", - "my subject\n" - "\n" - "my body which is long\n" - "and contains some special\n" - "chars like : = ? !\n" - "hello\n" - "\n" - "Fixes: x\n" - "Acked-by: x\n" - "Reviewed-by: x\n" - "Signed-off-by: x\n", - 4, - { - { - .raw = "Fixes: x\n", - .key = "Fixes", - .val = "x", - }, - { - .raw = "Acked-by: x\n", - .key = "Acked-by", - .val = "x", - }, - { - .raw = "Reviewed-by: x\n", - .key = "Reviewed-by", - .val = "x", - }, - { - .raw = "Signed-off-by: x\n", - .key = "Signed-off-by", - .val = "x", - }, - { - 0 - }, - }, - }, - { - "with body text, without divider (second trailer block)", - "my subject\n" - "\n" - "my body which is long\n" - "and contains some special\n" - "chars like : = ? !\n" - "hello\n" - "\n" - "Fixes: x\n" - "Acked-by: x\n" - "Reviewed-by: x\n" - "Signed-off-by: x\n" - "\n" - /* - * Because this is the last trailer block, it takes - * precedence over the first one encountered above. - */ - "Helped-by: x\n" - "Signed-off-by: x\n", - 2, - { - { - .raw = "Helped-by: x\n", - .key = "Helped-by", - .val = "x", - }, - { - .raw = "Signed-off-by: x\n", - .key = "Signed-off-by", - .val = "x", - }, - { - 0 - }, - }, - }, - { - "with body text, with divider", - "my subject\n" - "\n" - "my body which is long\n" - "and contains some special\n" - "chars like : = ? !\n" - "hello\n" - "\n" - "---\n" - "\n" - /* - * This trailer still counts because the iterator - * always ignores the divider. - */ - "Signed-off-by: x\n", - 1, - { - { - .raw = "Signed-off-by: x\n", - .key = "Signed-off-by", - .val = "x", - }, - { - 0 - }, - }, - }, - { - "with non-trailer lines in trailer block", - "subject: foo bar\n" - "\n" - /* - * Even though this trailer block has a non-trailer line - * in it, it's still a valid trailer block because it's - * at least 25% trailers and is Git-generated (see - * git_generated_prefixes[] in trailer.c). - */ - "not a trailer line\n" - "not a trailer line\n" - "not a trailer line\n" - "Signed-off-by: x\n", - /* - * Even though there is only really 1 real "trailer" - * (Signed-off-by), we still have 4 trailer objects - * because we still want to iterate through the entire - * block. - */ - 4, - { - { - .raw = "not a trailer line\n", - .key = "not a trailer line", - .val = "", - }, - { - .raw = "not a trailer line\n", - .key = "not a trailer line", - .val = "", - }, - { - .raw = "not a trailer line\n", - .key = "not a trailer line", - .val = "", - }, - { - .raw = "Signed-off-by: x\n", - .key = "Signed-off-by", - .val = "x", - }, - { - 0 - }, - }, - }, - { - "with non-trailer lines (one too many) in trailer block", - "subject: foo bar\n" - "\n" - /* - * This block has only 20% trailers, so it's below the - * 25% threshold. - */ - "not a trailer line\n" - "not a trailer line\n" - "not a trailer line\n" - "not a trailer line\n" - "Signed-off-by: x\n", - 0, - {{0}}, - }, - { - "with non-trailer lines (only 1) in trailer block, but no Git-generated trailers", - "subject: foo bar\n" - "\n" - /* - * This block has only 1 non-trailer out of 10 (IOW, 90% - * trailers) but is not considered a trailer block - * because the 25% threshold only applies to cases where - * there was a Git-generated trailer. - */ - "Reviewed-by: x\n" - "Reviewed-by: x\n" - "Reviewed-by: x\n" - "Helped-by: x\n" - "Helped-by: x\n" - "Helped-by: x\n" - "Acked-by: x\n" - "Acked-by: x\n" - "Acked-by: x\n" - "not a trailer line\n", - 0, - {{0}}, - }, - }; - - for (int i = 0; i < sizeof(tc) / sizeof(tc[0]); i++) { - TEST(t_trailer_iterator(tc[i].msg, - tc[i].num_expected, - tc[i].contents), - "%s", tc[i].name); - } -} - -int cmd_main(int argc UNUSED, const char **argv UNUSED) -{ - run_t_trailer_iterator(); - return test_done(); -} diff --git a/t/unit-tests/u-trailer.c b/t/unit-tests/u-trailer.c new file mode 100644 index 00000000000000..3d60ea1603dbda --- /dev/null +++ b/t/unit-tests/u-trailer.c @@ -0,0 +1,320 @@ +#define DISABLE_SIGN_COMPARE_WARNINGS + +#include "unit-test.h" +#include "trailer.h" + +struct contents { + const char *raw; + const char *key; + const char *val; +}; + +static void t_trailer_iterator(const char *msg, size_t num_expected, + struct contents *contents) +{ + struct trailer_iterator iter; + size_t i = 0; + + trailer_iterator_init(&iter, msg); + while (trailer_iterator_advance(&iter)) { + if (num_expected) { + cl_assert_equal_s(iter.raw, contents[i].raw); + cl_assert_equal_s(iter.key.buf, contents[i].key); + cl_assert_equal_s(iter.val.buf, contents[i].val); + } + i++; + } + trailer_iterator_release(&iter); + + cl_assert_equal_i(i, num_expected); +} + +void test_trailer__empty_input(void) +{ + struct contents expected_contents[] = { 0 }; + t_trailer_iterator("", 0, expected_contents); +} + +void test_trailer__no_newline_start(void) +{ + struct contents expected_contents[] = { 0 }; + + t_trailer_iterator("Fixes: x\n" + "Acked-by: x\n" + "Reviewed-by: x\n", + 0, + expected_contents); +} + +void test_trailer__newline_start(void) +{ + struct contents expected_contents[] = { + { + .raw = "Fixes: x\n", + .key = "Fixes", + .val = "x", + }, + { + .raw = "Acked-by: x\n", + .key = "Acked-by", + .val = "x", + }, + { + .raw = "Reviewed-by: x\n", + .key = "Reviewed-by", + .val = "x", + }, + { + 0 + }, + }; + + t_trailer_iterator("\n" + "Fixes: x\n" + "Acked-by: x\n" + "Reviewed-by: x\n", + 3, + expected_contents); +} + +void test_trailer__no_body_text(void) +{ + struct contents expected_contents[] = { + + { + .raw = "Fixes: x\n", + .key = "Fixes", + .val = "x", + }, + { + .raw = "Acked-by: x\n", + .key = "Acked-by", + .val = "x", + }, + { + .raw = "Reviewed-by: x\n", + .key = "Reviewed-by", + .val = "x", + }, + { + 0 + }, + }; + + t_trailer_iterator("subject: foo bar\n" + "\n" + "Fixes: x\n" + "Acked-by: x\n" + "Reviewed-by: x\n", + 3, + expected_contents); +} + +void test_trailer__body_text_no_divider(void) +{ + struct contents expected_contents[] = { + { + .raw = "Fixes: x\n", + .key = "Fixes", + .val = "x", + }, + { + .raw = "Acked-by: x\n", + .key = "Acked-by", + .val = "x", + }, + { + .raw = "Reviewed-by: x\n", + .key = "Reviewed-by", + .val = "x", + }, + { + .raw = "Signed-off-by: x\n", + .key = "Signed-off-by", + .val = "x", + }, + { + 0 + }, + }; + + t_trailer_iterator("my subject\n" + "\n" + "my body which is long\n" + "and contains some special\n" + "chars like : = ? !\n" + "hello\n" + "\n" + "Fixes: x\n" + "Acked-by: x\n" + "Reviewed-by: x\n" + "Signed-off-by: x\n", + 4, + expected_contents); +} + +void test_trailer__body_no_divider_2nd_block(void) +{ + struct contents expected_contents[] = { + { + .raw = "Helped-by: x\n", + .key = "Helped-by", + .val = "x", + }, + { + .raw = "Signed-off-by: x\n", + .key = "Signed-off-by", + .val = "x", + }, + { + 0 + }, + }; + + t_trailer_iterator("my subject\n" + "\n" + "my body which is long\n" + "and contains some special\n" + "chars like : = ? !\n" + "hello\n" + "\n" + "Fixes: x\n" + "Acked-by: x\n" + "Reviewed-by: x\n" + "Signed-off-by: x\n" + "\n" + /* + * Because this is the last trailer block, it takes + * precedence over the first one encountered above. + */ + "Helped-by: x\n" + "Signed-off-by: x\n", + 2, + expected_contents); +} + +void test_trailer__body_and_divider(void) +{ + struct contents expected_contents[] = { + { + .raw = "Signed-off-by: x\n", + .key = "Signed-off-by", + .val = "x", + }, + { + 0 + }, + }; + + t_trailer_iterator("my subject\n" + "\n" + "my body which is long\n" + "and contains some special\n" + "chars like : = ? !\n" + "hello\n" + "\n" + "---\n" + "\n" + /* + * This trailer still counts because the iterator + * always ignores the divider. + */ + "Signed-off-by: x\n", + 1, + expected_contents); +} + +void test_trailer__non_trailer_in_block(void) +{ + struct contents expected_contents[] = { + { + .raw = "not a trailer line\n", + .key = "not a trailer line", + .val = "", + }, + { + .raw = "not a trailer line\n", + .key = "not a trailer line", + .val = "", + }, + { + .raw = "not a trailer line\n", + .key = "not a trailer line", + .val = "", + }, + { + .raw = "Signed-off-by: x\n", + .key = "Signed-off-by", + .val = "x", + }, + { + 0 + }, + }; + + t_trailer_iterator("subject: foo bar\n" + "\n" + /* + * Even though this trailer block has a non-trailer line + * in it, it's still a valid trailer block because it's + * at least 25% trailers and is Git-generated (see + * git_generated_prefixes[] in trailer.c). + */ + "not a trailer line\n" + "not a trailer line\n" + "not a trailer line\n" + "Signed-off-by: x\n", + /* + * Even though there is only really 1 real "trailer" + * (Signed-off-by), we still have 4 trailer objects + * because we still want to iterate through the entire + * block. + */ + 4, + expected_contents); +} + +void test_trailer__too_many_non_trailers(void) +{ + struct contents expected_contents[] = { 0 }; + + t_trailer_iterator("subject: foo bar\n" + "\n" + /* + * This block has only 20% trailers, so it's below the + * 25% threshold. + */ + "not a trailer line\n" + "not a trailer line\n" + "not a trailer line\n" + "not a trailer line\n" + "Signed-off-by: x\n", + 0, + expected_contents); +} + +void test_trailer__one_non_trailer_no_git_trailers(void) +{ + struct contents expected_contents[] = { 0 }; + + t_trailer_iterator("subject: foo bar\n" + "\n" + /* + * This block has only 1 non-trailer out of 10 (IOW, 90% + * trailers) but is not considered a trailer block + * because the 25% threshold only applies to cases where + * there was a Git-generated trailer. + */ + "Reviewed-by: x\n" + "Reviewed-by: x\n" + "Reviewed-by: x\n" + "Helped-by: x\n" + "Helped-by: x\n" + "Helped-by: x\n" + "Acked-by: x\n" + "Acked-by: x\n" + "Acked-by: x\n" + "not a trailer line\n", + 0, + expected_contents); +} From 7d4212b8f376ddf1aa403e51b46c92106d079b2d Mon Sep 17 00:00:00 2001 From: Seyi Kuforiji Date: Tue, 4 Mar 2025 12:33:23 +0100 Subject: [PATCH 045/775] t/unit-tests: convert urlmatch-normalization test to clar Adapt urlmatch-normalization test file to use clar testing framework by using clar assertions where necessary. Mentored-by: Patrick Steinhardt Mentored-by: Phillip Wood Signed-off-by: Seyi Kuforiji Signed-off-by: Junio C Hamano --- Makefile | 2 +- t/meson.build | 2 +- ...alization.c => u-urlmatch-normalization.c} | 60 ++++++------------- 3 files changed, 20 insertions(+), 44 deletions(-) rename t/unit-tests/{t-urlmatch-normalization.c => u-urlmatch-normalization.c} (84%) diff --git a/Makefile b/Makefile index 9cb68aaa61ec14..a4787bff5dc976 100644 --- a/Makefile +++ b/Makefile @@ -1362,6 +1362,7 @@ CLAR_TEST_SUITES += u-strbuf CLAR_TEST_SUITES += u-strcmp-offset CLAR_TEST_SUITES += u-strvec CLAR_TEST_SUITES += u-trailer +CLAR_TEST_SUITES += u-urlmatch-normalization CLAR_TEST_PROG = $(UNIT_TEST_BIN)/unit-tests$(X) CLAR_TEST_OBJS = $(patsubst %,$(UNIT_TEST_DIR)/%.o,$(CLAR_TEST_SUITES)) CLAR_TEST_OBJS += $(UNIT_TEST_DIR)/clar/clar.o @@ -1378,7 +1379,6 @@ UNIT_TEST_PROGRAMS += t-reftable-reader UNIT_TEST_PROGRAMS += t-reftable-readwrite UNIT_TEST_PROGRAMS += t-reftable-record UNIT_TEST_PROGRAMS += t-reftable-stack -UNIT_TEST_PROGRAMS += t-urlmatch-normalization UNIT_TEST_PROGS = $(patsubst %,$(UNIT_TEST_BIN)/%$X,$(UNIT_TEST_PROGRAMS)) UNIT_TEST_OBJS += $(UNIT_TEST_DIR)/test-lib.o UNIT_TEST_OBJS += $(UNIT_TEST_DIR)/lib-oid.o diff --git a/t/meson.build b/t/meson.build index 560aa9a1f1bc48..e6ac23df11aecc 100644 --- a/t/meson.build +++ b/t/meson.build @@ -10,6 +10,7 @@ clar_test_suites = [ 'unit-tests/u-strcmp-offset.c', 'unit-tests/u-strvec.c', 'unit-tests/u-trailer.c', + 'unit-tests/u-urlmatch-normalization.c', ] clar_sources = [ @@ -60,7 +61,6 @@ unit_test_programs = [ 'unit-tests/t-reftable-readwrite.c', 'unit-tests/t-reftable-record.c', 'unit-tests/t-reftable-stack.c', - 'unit-tests/t-urlmatch-normalization.c', ] foreach unit_test_program : unit_test_programs diff --git a/t/unit-tests/t-urlmatch-normalization.c b/t/unit-tests/u-urlmatch-normalization.c similarity index 84% rename from t/unit-tests/t-urlmatch-normalization.c rename to t/unit-tests/u-urlmatch-normalization.c index 1769c357b97d4b..39f6e1ba26f3e5 100644 --- a/t/unit-tests/t-urlmatch-normalization.c +++ b/t/unit-tests/u-urlmatch-normalization.c @@ -1,12 +1,11 @@ -#include "test-lib.h" +#include "unit-test.h" #include "urlmatch.h" static void check_url_normalizable(const char *url, unsigned int normalizable) { char *url_norm = url_normalize(url, NULL); - if (!check_int(normalizable, ==, url_norm ? 1 : 0)) - test_msg("input url: %s", url); + cl_assert_equal_i(normalizable, url_norm ? 1 : 0); free(url_norm); } @@ -14,8 +13,7 @@ static void check_normalized_url(const char *url, const char *expect) { char *url_norm = url_normalize(url, NULL); - if (!check_str(url_norm, expect)) - test_msg("input url: %s", url); + cl_assert_equal_s(url_norm, expect); free(url_norm); } @@ -26,13 +24,9 @@ static void compare_normalized_urls(const char *url1, const char *url2, char *url2_norm = url_normalize(url2, NULL); if (equal) { - if (!check_str(url1_norm, url2_norm)) - test_msg("input url1: %s\n input url2: %s", url1, - url2); - } else if (!check_int(strcmp(url1_norm, url2_norm), !=, 0)) { - test_msg(" normalized url1: %s\n normalized url2: %s\n" - " input url1: %s\n input url2: %s", - url1_norm, url2_norm, url1, url2); + cl_assert_equal_s(url1_norm, url2_norm); + } else { + cl_assert(strcmp(url1_norm, url2_norm) != 0); } free(url1_norm); free(url2_norm); @@ -43,14 +37,12 @@ static void check_normalized_url_length(const char *url, size_t len) struct url_info info; char *url_norm = url_normalize(url, &info); - if (!check_int(info.url_len, ==, len)) - test_msg(" input url: %s\n normalized url: %s", url, - url_norm); + cl_assert_equal_i(info.url_len, len); free(url_norm); } /* Note that only "file:" URLs should be allowed without a host */ -static void t_url_scheme(void) +void test_urlmatch_normalization__scheme(void) { check_url_normalizable("", 0); check_url_normalizable("_", 0); @@ -73,7 +65,7 @@ static void t_url_scheme(void) check_normalized_url("AbCdeF://x.Y", "abcdef://x.y/"); } -static void t_url_authority(void) +void test_urlmatch_normalization__authority(void) { check_url_normalizable("scheme://user:pass@", 0); check_url_normalizable("scheme://?", 0); @@ -109,7 +101,7 @@ static void t_url_authority(void) check_url_normalizable("scheme://invalid....:[", 0); } -static void t_url_port(void) +void test_urlmatch_normalization__port(void) { check_url_normalizable("xyz://q@some.host:", 1); check_url_normalizable("xyz://q@some.host:456/", 1); @@ -139,7 +131,7 @@ static void t_url_port(void) check_url_normalizable("xyz://[::1]:030f/", 0); } -static void t_url_port_normalization(void) +void test_urlmatch_normalization__port_normalization(void) { check_normalized_url("http://x:800", "http://x:800/"); check_normalized_url("http://x:0800", "http://x:800/"); @@ -154,7 +146,7 @@ static void t_url_port_normalization(void) check_normalized_url("https://x:000000443", "https://x/"); } -static void t_url_general_escape(void) +void test_urlmatch_normalization__general_escape(void) { check_url_normalizable("http://x.y?%fg", 0); check_normalized_url("X://W/%7e%41^%3a", "x://w/~A%5E%3A"); @@ -164,7 +156,7 @@ static void t_url_general_escape(void) check_normalized_url("X://W?!", "x://w/?!"); } -static void t_url_high_bit(void) +void test_urlmatch_normalization__high_bit(void) { check_normalized_url( "x://q/\x01\x02\x03\x04\x05\x06\x07\x08\x0e\x0f\x10\x11\x12", @@ -198,26 +190,26 @@ static void t_url_high_bit(void) "x://q/%F0%F1%F2%F3%F4%F5%F6%F7%F8%F9%FA%FB%FC%FD%FE%FF"); } -static void t_url_utf8_escape(void) +void test_urlmatch_normalization__utf8_escape(void) { check_normalized_url( "x://q/\xc2\x80\xdf\xbf\xe0\xa0\x80\xef\xbf\xbd\xf0\x90\x80\x80\xf0\xaf\xbf\xbd", "x://q/%C2%80%DF%BF%E0%A0%80%EF%BF%BD%F0%90%80%80%F0%AF%BF%BD"); } -static void t_url_username_pass(void) +void test_urlmatch_normalization__username_pass(void) { check_normalized_url("x://%41%62(^):%70+d@foo", "x://Ab(%5E):p+d@foo/"); } -static void t_url_length(void) +void test_urlmatch_normalization__length(void) { check_normalized_url_length("Http://%4d%65:%4d^%70@The.Host", 25); check_normalized_url_length("http://%41:%42@x.y/%61/", 17); check_normalized_url_length("http://@x.y/^", 15); } -static void t_url_dots(void) +void test_urlmatch_normalization__dots(void) { check_normalized_url("x://y/.", "x://y/"); check_normalized_url("x://y/./", "x://y/"); @@ -244,7 +236,7 @@ static void t_url_dots(void) * "http://foo" specifies neither a user name nor a password. * So they should not be equivalent. */ -static void t_url_equivalents(void) +void test_urlmatch_normalization__equivalents(void) { compare_normalized_urls("httP://x", "Http://X/", 1); compare_normalized_urls("Http://%4d%65:%4d^%70@The.Host", "hTTP://Me:%4D^p@the.HOST:80/", 1); @@ -253,19 +245,3 @@ static void t_url_equivalents(void) compare_normalized_urls("https://@x.y/^/../abc", "httpS://@x.y:0443/abc", 1); compare_normalized_urls("https://@x.y/^/..", "httpS://@x.y:0443/", 1); } - -int cmd_main(int argc UNUSED, const char **argv UNUSED) -{ - TEST(t_url_scheme(), "url scheme"); - TEST(t_url_authority(), "url authority"); - TEST(t_url_port(), "url port checks"); - TEST(t_url_port_normalization(), "url port normalization"); - TEST(t_url_general_escape(), "url general escapes"); - TEST(t_url_high_bit(), "url high-bit escapes"); - TEST(t_url_utf8_escape(), "url utf8 escapes"); - TEST(t_url_username_pass(), "url username/password escapes"); - TEST(t_url_length(), "url normalized lengths"); - TEST(t_url_dots(), "url . and .. segments"); - TEST(t_url_equivalents(), "url equivalents"); - return test_done(); -} From 107d8893032ef5b4e6216377752f0ac1d17a0615 Mon Sep 17 00:00:00 2001 From: Mahendra Dani Date: Tue, 4 Mar 2025 16:57:28 +0530 Subject: [PATCH 046/775] t1403: verify that path exists and is a file Verify that if the path exists then it is a file using test_path_is_file(). Signed-off-by: Mahendra Dani Signed-off-by: Junio C Hamano --- t/t1403-show-ref.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/t1403-show-ref.sh b/t/t1403-show-ref.sh index 9d698b3cc35a7f..9da3650e91bac5 100755 --- a/t/t1403-show-ref.sh +++ b/t/t1403-show-ref.sh @@ -196,7 +196,7 @@ test_expect_success 'show-ref --verify with dangling ref' ' remove_object() { file=$(sha1_file "$*") && - test -e "$file" && + test_path_is_file "$file" && rm -f "$file" } && From 27be76b230b07360b64aec06d6b0b9bc9e993603 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 6 Mar 2025 10:34:48 -0500 Subject: [PATCH 047/775] refs.c: remove empty '--exclude' patterns In 59c35fac54 (refs/packed-backend.c: implement jump lists to avoid excluded pattern(s), 2023-07-10), the packed-refs backend learned how to construct "jump lists" to avoid enumerating sections of the packed-refs file that we know the caller is going to throw out anyway. This process works by finding the start- and end-points (that is, where in the packed-refs file corresponds to the range we're going to ignore) for each exclude pattern, then constructing a jump list based on that. At enumeration time we'll consult the jump list to skip past everything in the range(s) found in the previous step, saving time when excluding a large portion of references. But when there is a --exclude pattern which is just the empty string, the behavior is a little funky. When we try and exclude the empty string, the matched range covers the entire packed-refs file, meaning that we won't output any packed references. But the empty pattern doesn't actually match any references to begin with! For example, on my copy of git.git I can do: $ git for-each-ref '' | wc -l 0 So "git for-each-ref --exclude=''" shouldn't actually remove anything from the output, and ought to be equivalent to "git for-each-ref". But it's not, and in fact: $ git for-each-ref | wc -l 2229 $ git for-each-ref --exclude='' | wc -l 480 But why does the '--exclude' version output only some of the references in the repository? Here's a hint: $ find .git/refs -type f | wc -l 480 Indeed, because the files backend doesn't implement[^1] the same jump list concept as the packed backend we get the correct result for the loose references, but none of the packed references. Since the empty string exclude pattern doesn't match anything, we can discard them before the packed-refs backend has a chance to even see it (and likewise for reftable, which also implements a similar concept since 1869525066 (refs/reftable: wire up support for exclude patterns, 2024-09-16)). This approach (copying only some of the patterns into a strvec at the refs.c layer) may seem heavy-handed, but it's setting us up to fix another bug in the following commit where the fix will involve modifying the incoming patterns. [^1]: As noted in 59c35fac54. We technically could avoid opening and enumerating the contents of, for e.g., "$GIT_DIR/refs/heads/foo/" if we knew that we were excluding anything under the 'refs/heads/foo' hierarchy. But the --exclude stuff is all best-effort anyway, since the caller is expected to cull out any results that they don't want. Noticed-by: Jeff King Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- refs.c | 16 ++++++++++++++++ t/t1419-exclude-refs.sh | 10 ++++++++++ 2 files changed, 26 insertions(+) diff --git a/refs.c b/refs.c index 915aeb4d1dbb62..fa943d7d64b398 100644 --- a/refs.c +++ b/refs.c @@ -1560,6 +1560,20 @@ struct ref_iterator *refs_ref_iterator_begin( enum do_for_each_ref_flags flags) { struct ref_iterator *iter; + struct strvec normalized_exclude_patterns = STRVEC_INIT; + + if (exclude_patterns) { + for (size_t i = 0; exclude_patterns[i]; i++) { + const char *pattern = exclude_patterns[i]; + size_t len = strlen(pattern); + if (!len) + continue; + + strvec_push(&normalized_exclude_patterns, pattern); + } + + exclude_patterns = normalized_exclude_patterns.v; + } if (!(flags & DO_FOR_EACH_INCLUDE_BROKEN)) { static int ref_paranoia = -1; @@ -1580,6 +1594,8 @@ struct ref_iterator *refs_ref_iterator_begin( if (trim) iter = prefix_ref_iterator_begin(iter, "", trim); + strvec_clear(&normalized_exclude_patterns); + return iter; } diff --git a/t/t1419-exclude-refs.sh b/t/t1419-exclude-refs.sh index 13595744190b54..b5e01e9f45b754 100755 --- a/t/t1419-exclude-refs.sh +++ b/t/t1419-exclude-refs.sh @@ -125,4 +125,14 @@ test_expect_success 'meta-characters are discarded' ' assert_no_jumps perf ' +test_expect_success 'empty string exclude pattern is ignored' ' + git update-ref refs/heads/loose $(git rev-parse refs/heads/foo/1) && + + for_each_ref__exclude refs/heads "" >actual 2>perf && + for_each_ref >expect && + + test_cmp expect actual && + assert_no_jumps perf +' + test_done From 10e8a9352bcc7b21a64e0b321f302cf900ac8c77 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 6 Mar 2025 10:34:53 -0500 Subject: [PATCH 048/775] refs.c: stop matching non-directory prefixes in exclude patterns In the packed-refs backend, our implementation of '--exclude' (dating back to 59c35fac54 (refs/packed-backend.c: implement jump lists to avoid excluded pattern(s), 2023-07-10)) considers, for example: $ git for-each-ref --exclude=refs/heads/ba to exclude "refs/heads/bar", "refs/heads/baz", and so on. The files backend, which does not implement '--exclude' (and relies on the caller to cull out results that don't match) naturally will enumerate "refs/heads/bar" and so on. So in the above example, 'for-each-ref' will try and see if "refs/heads/ba" matches "refs/heads/bar" (since the files backend simply enumerated every loose reference), and, realizing that it does not match, output the reference as expected. (A caller that did want to exclude "refs/heads/bar" and "refs/heads/baz" might instead run "git for-each-ref --exclude='refs/heads/ba*'"). This can lead to strange behavior, like seeing a different set of references advertised via 'upload-pack' depending on what set of references were loose versus packed. So there is a subtle bug with '--exclude' which is that in the packed-refs backend we will consider "refs/heads/bar" to be a pattern match against "refs/heads/ba" when we shouldn't. Likewise, the reftable backend (which in this case is bug-compatible with the packed backend) exhibits the same broken behavior. There are a few ways to fix this. One is to tighten the rules in cmp_record_to_refname(), which is used to determine the start/end-points of the jump list used by the packed backend. In this new "strict" mode, the comparison function would handle the case where we've reached the end of the pattern by introducing a new check like so: while (1) { if (*r1 == '\n') return *r2 ? -1 : 0; if (!*r2) if (strict && *r1 != '/') /* <- here */ return 1; return start ? 1 : -1; if (*r1 != *r2) return (unsigned char)*r1 < (unsigned char)*r2 ? -1 : +1; r1++; r2++; } (eliding out the rest of cmp_record_to_refname()). Equivalently, we could teach refs/packed-backend::populate_excluded_jump_list() to append a trailing '/' if one does not already exist, forcing an exclude pattern like "refs/heads/ba" to only match "refs/heads/ba/abc" and so forth. But since the same problem exists in reftable, we can fix both at once by performing this pre-processing step one layer up in refs.c at the common entrypoint for the two, which is 'refs_ref_iterator_begin()'. Since that solution is both the simplest and only requires modification in one spot, let's normalize exclude patterns so that they end with a trailing slash. This causes us to unify the behavior between all three backends. There is some minor test fallout in the "overlapping excluded regions" test, which happens to use 'refs/ba' as an exclude pattern, and expects references under the "refs/heads/bar/*" and "refs/heads/baz/*" hierarchies to be excluded from the results. But that test fallout is expected, because the test was codifying the buggy behavior to begin with, and should have never been written that way. Split that into its own test (since the range is no longer overlapping under the stricter interpretation of --exclude patterns presented here). Create a new test which does have overlapping regions by using a refs/heads/bar/4/... hierarchy and excluding both "refs/heads/bar" and "refs/heads/bar/4". Reported-by: SURA Helped-by: Jeff King Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- refs.c | 6 +++++- t/t1419-exclude-refs.sh | 16 ++++++++++++++-- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/refs.c b/refs.c index fa943d7d64b398..e4fda6b4bbc908 100644 --- a/refs.c +++ b/refs.c @@ -1569,7 +1569,11 @@ struct ref_iterator *refs_ref_iterator_begin( if (!len) continue; - strvec_push(&normalized_exclude_patterns, pattern); + if (pattern[len - 1] == '/') + strvec_push(&normalized_exclude_patterns, pattern); + else + strvec_pushf(&normalized_exclude_patterns, "%s/", + pattern); } exclude_patterns = normalized_exclude_patterns.v; diff --git a/t/t1419-exclude-refs.sh b/t/t1419-exclude-refs.sh index b5e01e9f45b754..11d6dfdb251f50 100755 --- a/t/t1419-exclude-refs.sh +++ b/t/t1419-exclude-refs.sh @@ -46,6 +46,10 @@ test_expect_success 'setup' ' echo "create refs/heads/$name/$i $base" || return 1 done || return 1 done >in && + for i in 5 6 7 + do + echo "create refs/heads/bar/4/$i $base" || return 1 + done >>in && echo "delete refs/heads/main" >>in && git update-ref --stdin actual 2>perf && - for_each_ref refs/heads/foo refs/heads/quux >expect && + for_each_ref refs/heads/bar refs/heads/foo refs/heads/quux >expect && + + test_cmp expect actual && + assert_jumps 1 perf +' + +test_expect_success 'overlapping excluded regions' ' + for_each_ref__exclude refs/heads refs/heads/bar refs/heads/bar/4 >actual 2>perf && + for_each_ref refs/heads/baz refs/heads/foo refs/heads/quux >expect && test_cmp expect actual && assert_jumps 1 perf From 98a1a00d53018c7e664644d886466a820aa5e6d7 Mon Sep 17 00:00:00 2001 From: Dmitry Goncharov Date: Thu, 6 Mar 2025 15:30:26 +0000 Subject: [PATCH 049/775] t6423: add a testcase causing a failed assertion in process_renames If one side of history renames a directory A/ -> B/, and the other side of history adds new files to A/, then directory rename detection notices and moves or suggests moving those new files to B/. A similar thing is done for paths renamed into A/, causing them to be transitively renamed into B/. But, if the file originally came from B/, then this can end up causing a file to be renamed back to itself. merge-ort crashes under this special case, due to a slightly overzealous assertion: git: merge-ort.c:3051: process_renames: Assertion `source_deleted || oldinfo->filemask & old_sidemask' failed. Aborted (core dumped) Add a testcase demonstrating this. Signed-off-by: Dmitry Goncharov [en: Instead of adding a new testsuite, place it near similar tests in t6423, adjusting to match the style of those tests. Tweak the commit message to not repeat the entire testcase, but just describe the bug. Also update the line number in the error message.] Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- t/t6423-merge-rename-directories.sh | 41 +++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/t/t6423-merge-rename-directories.sh b/t/t6423-merge-rename-directories.sh index 94080c65d12b03..7b8d79f56c19d7 100755 --- a/t/t6423-merge-rename-directories.sh +++ b/t/t6423-merge-rename-directories.sh @@ -5363,6 +5363,47 @@ test_expect_merge_algorithm failure success '12m: Change parent of renamed-dir t ) ' +test_setup_12n () { + git init 12n && + ( + cd 12n && + + mkdir tools && + echo hello >tools/hello && + git add tools/hello && + git commit -m "O" && + + git branch O && + git branch A && + git branch B && + + git switch A && + echo world >world && + git add world && + git commit -q world -m 'Add world' && + + git mv world tools/world && + git commit -m "Move world into tools/" && + + git switch B && + git mv tools/hello hello && + git commit -m "Move hello from tools/ to toplevel" + ) +} + +test_expect_failure '12n: Directory rename transitively makes rename back to self' ' + test_setup_12n && + ( + cd 12n && + + git checkout -q B^0 && + + test_must_fail git cherry-pick A^0 >out && + grep "CONFLICT (file location).*should perhaps be moved" out + ) +' + + ########################################################################### # SECTION 13: Checking informational and conflict messages # From 3adba40858036a5a44f550aaab5287ad135f5f87 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Thu, 6 Mar 2025 15:30:27 +0000 Subject: [PATCH 050/775] merge-ort: fix slightly overzealous assertion for rename-to-self merge-ort has a number of sanity checks on the file it is processing in process_renames(). One of these sanity checks was slightly overzealous because it indirectly assumed that a renamed file always ended up at a different path than where it started. That is normally an entirely fair assumption, but directory rename detection can make things interesting. As a quick refresher, if one side of history renames directory A/ -> B/, and the other side of history adds new files to A/, then directory rename detection notices and suggests moving those new files to B/. A similar thing is done for paths renamed into A/, causing them to be transitively renamed into B/. But, if the file originally came from B/, then this can end up causing a file to be renamed back to itself. It turns out the rest of the code following this assertion handled the case fine; the assertion was just an extra sanity check, not a rigid precondition. Therefore, simply adjust the assertion to pass under this special case as well. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 3 ++- t/t6423-merge-rename-directories.sh | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/merge-ort.c b/merge-ort.c index 46e78c3ffa68e6..b0ff2236af03d9 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -3048,7 +3048,8 @@ static int process_renames(struct merge_options *opt, } } - assert(source_deleted || oldinfo->filemask & old_sidemask); + assert(source_deleted || oldinfo->filemask & old_sidemask || + !strcmp(pair->one->path, pair->two->path)); /* Need to check for special types of rename conflicts... */ if (collision && !source_deleted) { diff --git a/t/t6423-merge-rename-directories.sh b/t/t6423-merge-rename-directories.sh index 7b8d79f56c19d7..79d889b94cbcf3 100755 --- a/t/t6423-merge-rename-directories.sh +++ b/t/t6423-merge-rename-directories.sh @@ -5391,7 +5391,7 @@ test_setup_12n () { ) } -test_expect_failure '12n: Directory rename transitively makes rename back to self' ' +test_expect_success '12n: Directory rename transitively makes rename back to self' ' test_setup_12n && ( cd 12n && From f29f1990b5605f8dbed65324d8e92b67d4dd1713 Mon Sep 17 00:00:00 2001 From: Usman Akinyemi Date: Sat, 8 Mar 2025 05:05:00 +0530 Subject: [PATCH 051/775] config: teach repo_config to allow `repo` to be NULL The `repo` value can be NULL if a builtin command is run outside any repository. The current implementation of `repo_config()` will fail if `repo` is NULL. If the `repo` is NULL the `repo_config()` can ignore the repository configuration but it should read the other configuration sources like the system-side configuration instead of failing. Teach the `repo_config()` to allow `repo` to be NULL by calling the `read_very_early_config()` which read config but only enumerate system and global settings. This will be useful in the following commits. Suggested-by: Junio C Hamano Mentored-by: Christian Couder Signed-off-by: Usman Akinyemi Signed-off-by: Junio C Hamano --- config.c | 4 ++++ config.h | 9 +++++++++ 2 files changed, 13 insertions(+) diff --git a/config.c b/config.c index dfd03b9421cdf9..f13a2870f16863 100644 --- a/config.c +++ b/config.c @@ -2521,6 +2521,10 @@ void repo_config_clear(struct repository *repo) void repo_config(struct repository *repo, config_fn_t fn, void *data) { + if (!repo) { + read_very_early_config(fn, data); + return; + } git_config_check_init(repo); configset_iter(repo->config, fn, data); } diff --git a/config.h b/config.h index 5c730c4f899160..29a027748375f1 100644 --- a/config.h +++ b/config.h @@ -219,6 +219,15 @@ void read_very_early_config(config_fn_t cb, void *data); * repo-specific one; by overwriting, the higher-priority repo-specific * value is left at the end). * + * In cases where the repository variable is NULL, repo_config() will + * skip the per-repository config but retain system and global configs + * by calling read_very_early_config() which also ignores one-time + * overrides like "git -c var=val". This is to support handling "git foo -h" + * (which lets git.c:run_builtin() to pass NULL and have the cmd_foo() + * call repo_config() before calling parse_options() to notice "-h", give + * help and exit) for a command that ordinarily require a repository + * so this limitation may be OK (but if needed you are welcome to fix it). + * * Unlike git_config_from_file(), this function respects includes. */ void repo_config(struct repository *r, config_fn_t fn, void *); From 43a839197795a7f0582efd66bc249dac89f86b68 Mon Sep 17 00:00:00 2001 From: Usman Akinyemi Date: Sat, 8 Mar 2025 05:05:01 +0530 Subject: [PATCH 052/775] builtin/verify-tag: stop using `the_repository` Remove the_repository global variable in favor of the repository argument that gets passed in "builtin/verify-tag.c". When `-h` is passed to the command outside a Git repository, the `run_builtin()` will call the `cmd_verify_tag()` function with `repo` set to NULL and then early in the function, `parse_options()` call will give the options help and exit. Mentored-by: Christian Couder Signed-off-by: Usman Akinyemi Signed-off-by: Junio C Hamano --- builtin/verify-tag.c | 7 +++---- t/t7030-verify-tag.sh | 7 +++++++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/builtin/verify-tag.c b/builtin/verify-tag.c index f6b97048a57d37..ed1c40338fd88c 100644 --- a/builtin/verify-tag.c +++ b/builtin/verify-tag.c @@ -5,7 +5,6 @@ * * Based on git-verify-tag.sh */ -#define USE_THE_REPOSITORY_VARIABLE #include "builtin.h" #include "config.h" #include "gettext.h" @@ -23,7 +22,7 @@ static const char * const verify_tag_usage[] = { int cmd_verify_tag(int argc, const char **argv, const char *prefix, - struct repository *repo UNUSED) + struct repository *repo) { int i = 1, verbose = 0, had_error = 0; unsigned flags = 0; @@ -35,7 +34,7 @@ int cmd_verify_tag(int argc, OPT_END() }; - git_config(git_default_config, NULL); + repo_config(repo, git_default_config, NULL); argc = parse_options(argc, argv, prefix, verify_tag_options, verify_tag_usage, PARSE_OPT_KEEP_ARGV0); @@ -56,7 +55,7 @@ int cmd_verify_tag(int argc, struct object_id oid; const char *name = argv[i++]; - if (repo_get_oid(the_repository, name, &oid)) { + if (repo_get_oid(repo, name, &oid)) { had_error = !!error("tag '%s' not found.", name); continue; } diff --git a/t/t7030-verify-tag.sh b/t/t7030-verify-tag.sh index 6f526c37c2776e..2c147072c17f4c 100755 --- a/t/t7030-verify-tag.sh +++ b/t/t7030-verify-tag.sh @@ -7,6 +7,13 @@ export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME . ./test-lib.sh . "$TEST_DIRECTORY/lib-gpg.sh" +test_expect_success GPG 'verify-tag does not crash with -h' ' + test_expect_code 129 git verify-tag -h >usage && + test_grep "[Uu]sage: git verify-tag " usage && + test_expect_code 129 nongit git verify-tag -h >usage && + test_grep "[Uu]sage: git verify-tag " usage +' + test_expect_success GPG 'create signed tags' ' echo 1 >file && git add file && test_tick && git commit -m initial && From db58d5a351031ec51817c1a6566ec5672ba17138 Mon Sep 17 00:00:00 2001 From: Usman Akinyemi Date: Sat, 8 Mar 2025 05:05:02 +0530 Subject: [PATCH 053/775] builtin/verify-commit: stop using `the_repository` Remove the_repository global variable in favor of the repository argument that gets passed in "builtin/verify-commit.c". When `-h` is passed to the command outside a Git repository, the `run_builtin()` will call the `cmd_verify_commit()` function with `repo` set to NULL and then early in the function, `parse_options()` call will give the options help and exit. Pass the repository available in the calling context to `verify_commit()` to remove it's dependency on the global `the_repository` variable. Mentored-by: Christian Couder Signed-off-by: Usman Akinyemi Signed-off-by: Junio C Hamano --- builtin/verify-commit.c | 13 ++++++------- t/t7510-signed-commit.sh | 7 +++++++ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/builtin/verify-commit.c b/builtin/verify-commit.c index 779b7988ca9784..5f749a30daf015 100644 --- a/builtin/verify-commit.c +++ b/builtin/verify-commit.c @@ -5,7 +5,6 @@ * * Based on git-verify-tag */ -#define USE_THE_REPOSITORY_VARIABLE #include "builtin.h" #include "config.h" #include "gettext.h" @@ -33,15 +32,15 @@ static int run_gpg_verify(struct commit *commit, unsigned flags) return ret; } -static int verify_commit(const char *name, unsigned flags) +static int verify_commit(struct repository *repo, const char *name, unsigned flags) { struct object_id oid; struct object *obj; - if (repo_get_oid(the_repository, name, &oid)) + if (repo_get_oid(repo, name, &oid)) return error("commit '%s' not found.", name); - obj = parse_object(the_repository, &oid); + obj = parse_object(repo, &oid); if (!obj) return error("%s: unable to read file.", name); if (obj->type != OBJ_COMMIT) @@ -54,7 +53,7 @@ static int verify_commit(const char *name, unsigned flags) int cmd_verify_commit(int argc, const char **argv, const char *prefix, - struct repository *repo UNUSED) + struct repository *repo) { int i = 1, verbose = 0, had_error = 0; unsigned flags = 0; @@ -64,7 +63,7 @@ int cmd_verify_commit(int argc, OPT_END() }; - git_config(git_default_config, NULL); + repo_config(repo, git_default_config, NULL); argc = parse_options(argc, argv, prefix, verify_commit_options, verify_commit_usage, PARSE_OPT_KEEP_ARGV0); @@ -78,7 +77,7 @@ int cmd_verify_commit(int argc, * was received in the process of writing the gpg input: */ signal(SIGPIPE, SIG_IGN); while (i < argc) - if (verify_commit(argv[i++], flags)) + if (verify_commit(repo, argv[i++], flags)) had_error = 1; return had_error; } diff --git a/t/t7510-signed-commit.sh b/t/t7510-signed-commit.sh index 0d2dd29fe6a12c..39677e859ab311 100755 --- a/t/t7510-signed-commit.sh +++ b/t/t7510-signed-commit.sh @@ -8,6 +8,13 @@ export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME GNUPGHOME_NOT_USED=$GNUPGHOME . "$TEST_DIRECTORY/lib-gpg.sh" +test_expect_success GPG 'verify-commit does not crash with -h' ' + test_expect_code 129 git verify-commit -h >usage && + test_grep "[Uu]sage: git verify-commit " usage && + test_expect_code 129 nongit git verify-commit -h >usage && + test_grep "[Uu]sage: git verify-commit " usage +' + test_expect_success GPG 'create signed commits' ' test_oid_cache <<-\EOF && header sha1:gpgsig From 1c14b1aedec6e9307e0c73276b8c1c283413ea41 Mon Sep 17 00:00:00 2001 From: Usman Akinyemi Date: Sat, 8 Mar 2025 05:05:03 +0530 Subject: [PATCH 054/775] builtin/send-pack: stop using `the_repository` Remove the_repository global variable in favor of the repository argument that gets passed in "builtin/send-pack.c". When `-h` is passed to the command outside a Git repository, the `run_builtin()` will call the `cmd_send_pack()` function with `repo` set to NULL and then early in the function, `parse_options()` call will give the options help and exit. Mentored-by: Christian Couder Signed-off-by: Usman Akinyemi Signed-off-by: Junio C Hamano --- builtin/send-pack.c | 7 +++---- t/t5400-send-pack.sh | 7 +++++++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/builtin/send-pack.c b/builtin/send-pack.c index 8d461008e2e860..c6e0e9d0518634 100644 --- a/builtin/send-pack.c +++ b/builtin/send-pack.c @@ -1,4 +1,3 @@ -#define USE_THE_REPOSITORY_VARIABLE #include "builtin.h" #include "config.h" #include "hex.h" @@ -151,7 +150,7 @@ static int send_pack_config(const char *k, const char *v, int cmd_send_pack(int argc, const char **argv, const char *prefix, - struct repository *repo UNUSED) + struct repository *repo) { struct refspec rs = REFSPEC_INIT_PUSH; const char *remote_name = NULL; @@ -212,7 +211,7 @@ int cmd_send_pack(int argc, OPT_END() }; - git_config(send_pack_config, NULL); + repo_config(repo, send_pack_config, NULL); argc = parse_options(argc, argv, prefix, options, send_pack_usage, 0); if (argc > 0) { dest = argv[0]; @@ -317,7 +316,7 @@ int cmd_send_pack(int argc, set_ref_status_for_push(remote_refs, args.send_mirror, args.force_update); - ret = send_pack(the_repository, &args, fd, conn, remote_refs, &extra_have); + ret = send_pack(repo, &args, fd, conn, remote_refs, &extra_have); if (helper_status) print_helper_status(remote_refs); diff --git a/t/t5400-send-pack.sh b/t/t5400-send-pack.sh index 3f81f16e1335ce..8f018d2f235cf9 100755 --- a/t/t5400-send-pack.sh +++ b/t/t5400-send-pack.sh @@ -55,6 +55,13 @@ test_expect_success setup ' echo Rebase && git log' +test_expect_success 'send-pack does not crash with -h' ' + test_expect_code 129 git send-pack -h >usage && + test_grep "[Uu]sage: git send-pack " usage && + test_expect_code 129 nongit git send-pack -h >usage && + test_grep "[Uu]sage: git send-pack " usage +' + test_expect_success 'pack the source repository' ' git repack -a -d && git prune From 72fe8bfac832a1f1b9b7cdc4efa793c48e13933e Mon Sep 17 00:00:00 2001 From: Usman Akinyemi Date: Sat, 8 Mar 2025 05:05:04 +0530 Subject: [PATCH 055/775] builtin/pack-refs: stop using `the_repository` Remove the_repository global variable in favor of the repository argument that gets passed in "builtin/pack-refs.c". When `-h` is passed to the command outside a Git repository, the `run_builtin()` will call the `cmd_pack_refs()` function with `repo` set to NULL and then early in the function, `parse_options()` call will give the options help and exit. Mentored-by: Christian Couder Signed-off-by: Usman Akinyemi Signed-off-by: Junio C Hamano --- builtin/pack-refs.c | 8 +++----- t/t0610-reftable-basics.sh | 7 +++++++ 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/builtin/pack-refs.c b/builtin/pack-refs.c index 4fdd68880e0c6d..e47bae1c803bc8 100644 --- a/builtin/pack-refs.c +++ b/builtin/pack-refs.c @@ -1,5 +1,3 @@ -#define USE_THE_REPOSITORY_VARIABLE - #include "builtin.h" #include "config.h" #include "gettext.h" @@ -15,7 +13,7 @@ static char const * const pack_refs_usage[] = { int cmd_pack_refs(int argc, const char **argv, const char *prefix, - struct repository *repo UNUSED) + struct repository *repo) { struct ref_exclusions excludes = REF_EXCLUSIONS_INIT; struct string_list included_refs = STRING_LIST_INIT_NODUP; @@ -39,7 +37,7 @@ int cmd_pack_refs(int argc, N_("references to exclude")), OPT_END(), }; - git_config(git_default_config, NULL); + repo_config(repo, git_default_config, NULL); if (parse_options(argc, argv, prefix, opts, pack_refs_usage, 0)) usage_with_options(pack_refs_usage, opts); @@ -52,7 +50,7 @@ int cmd_pack_refs(int argc, if (!pack_refs_opts.includes->nr) string_list_append(pack_refs_opts.includes, "refs/tags/*"); - ret = refs_pack_refs(get_main_ref_store(the_repository), &pack_refs_opts); + ret = refs_pack_refs(get_main_ref_store(repo), &pack_refs_opts); clear_ref_exclusions(&excludes); string_list_clear(&included_refs, 0); diff --git a/t/t0610-reftable-basics.sh b/t/t0610-reftable-basics.sh index 4618ffc108edd8..002a75dee85f55 100755 --- a/t/t0610-reftable-basics.sh +++ b/t/t0610-reftable-basics.sh @@ -14,6 +14,13 @@ export GIT_TEST_DEFAULT_REF_FORMAT INVALID_OID=$(test_oid 001) +test_expect_success 'pack-refs does not crash with -h' ' + test_expect_code 129 git pack-refs -h >usage && + test_grep "[Uu]sage: git pack-refs " usage && + test_expect_code 129 nongit git pack-refs -h >usage && + test_grep "[Uu]sage: git pack-refs " usage +' + test_expect_success 'init: creates basic reftable structures' ' test_when_finished "rm -rf repo" && git init repo && From d9c5cfb18f4dff87db7d28846bd754c009f6043d Mon Sep 17 00:00:00 2001 From: Usman Akinyemi Date: Sat, 8 Mar 2025 05:05:05 +0530 Subject: [PATCH 056/775] builtin/ls-files: stop using `the_repository` Remove the_repository global variable in favor of the repository argument that gets passed in "builtin/ls-files.c". When `-h` is passed to the command outside a Git repository, the `run_builtin()` will call the `cmd_ls_files()` function with `repo` set to NULL and then early in the function, `show_usage_with_options_if_asked()` call will give the options help and exit. Pass the repository available in the calling context to both `expand_objectsize()` and `show_ru_info()` to remove their dependency on the global `the_repository` variable. Mentored-by: Christian Couder Signed-off-by: Usman Akinyemi Signed-off-by: Junio C Hamano --- builtin/ls-files.c | 32 ++++++++++++++++---------------- t/t3004-ls-files-basic.sh | 7 +++++++ 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/builtin/ls-files.c b/builtin/ls-files.c index a4431429b7de42..70a377e9c0e29c 100644 --- a/builtin/ls-files.c +++ b/builtin/ls-files.c @@ -6,7 +6,6 @@ * Copyright (C) Linus Torvalds, 2005 */ -#define USE_THE_REPOSITORY_VARIABLE #define DISABLE_SIGN_COMPARE_WARNINGS #include "builtin.h" @@ -245,12 +244,13 @@ static void show_submodule(struct repository *superproject, repo_clear(&subrepo); } -static void expand_objectsize(struct strbuf *line, const struct object_id *oid, +static void expand_objectsize(struct repository *repo, struct strbuf *line, + const struct object_id *oid, const enum object_type type, unsigned int padded) { if (type == OBJ_BLOB) { unsigned long size; - if (oid_object_info(the_repository, oid, &size) < 0) + if (oid_object_info(repo, oid, &size) < 0) die(_("could not get object info about '%s'"), oid_to_hex(oid)); if (padded) @@ -283,10 +283,10 @@ static void show_ce_fmt(struct repository *repo, const struct cache_entry *ce, else if (skip_prefix(format, "(objecttype)", &format)) strbuf_addstr(&sb, type_name(object_type(ce->ce_mode))); else if (skip_prefix(format, "(objectsize:padded)", &format)) - expand_objectsize(&sb, &ce->oid, + expand_objectsize(repo, &sb, &ce->oid, object_type(ce->ce_mode), 1); else if (skip_prefix(format, "(objectsize)", &format)) - expand_objectsize(&sb, &ce->oid, + expand_objectsize(repo, &sb, &ce->oid, object_type(ce->ce_mode), 0); else if (skip_prefix(format, "(stage)", &format)) strbuf_addf(&sb, "%d", ce_stage(ce)); @@ -348,7 +348,7 @@ static void show_ce(struct repository *repo, struct dir_struct *dir, } } -static void show_ru_info(struct index_state *istate) +static void show_ru_info(struct repository *repo, struct index_state *istate) { struct string_list_item *item; @@ -370,7 +370,7 @@ static void show_ru_info(struct index_state *istate) if (!ui->mode[i]) continue; printf("%s%06o %s %d\t", tag_resolve_undo, ui->mode[i], - repo_find_unique_abbrev(the_repository, &ui->oid[i], abbrev), + repo_find_unique_abbrev(repo, &ui->oid[i], abbrev), i + 1); write_name(path); } @@ -567,7 +567,7 @@ static int option_parse_exclude_standard(const struct option *opt, int cmd_ls_files(int argc, const char **argv, const char *cmd_prefix, - struct repository *repo UNUSED) + struct repository *repo) { int require_work_tree = 0, show_tag = 0, i; char *max_prefix; @@ -647,15 +647,15 @@ int cmd_ls_files(int argc, show_usage_with_options_if_asked(argc, argv, ls_files_usage, builtin_ls_files_options); - prepare_repo_settings(the_repository); - the_repository->settings.command_requires_full_index = 0; + prepare_repo_settings(repo); + repo->settings.command_requires_full_index = 0; prefix = cmd_prefix; if (prefix) prefix_len = strlen(prefix); - git_config(git_default_config, NULL); + repo_config(repo, git_default_config, NULL); - if (repo_read_index(the_repository) < 0) + if (repo_read_index(repo) < 0) die("index file corrupt"); argc = parse_options(argc, argv, prefix, builtin_ls_files_options, @@ -724,7 +724,7 @@ int cmd_ls_files(int argc, max_prefix = common_prefix(&pathspec); max_prefix_len = get_common_prefix_len(max_prefix); - prune_index(the_repository->index, max_prefix, max_prefix_len); + prune_index(repo->index, max_prefix, max_prefix_len); /* Treat unmatching pathspec elements as errors */ if (pathspec.nr && error_unmatch) @@ -748,13 +748,13 @@ int cmd_ls_files(int argc, */ if (show_stage || show_unmerged) die(_("options '%s' and '%s' cannot be used together"), "ls-files --with-tree", "-s/-u"); - overlay_tree_on_index(the_repository->index, with_tree, max_prefix); + overlay_tree_on_index(repo->index, with_tree, max_prefix); } - show_files(the_repository, &dir); + show_files(repo, &dir); if (show_resolve_undo) - show_ru_info(the_repository->index); + show_ru_info(repo, repo->index); if (ps_matched && report_path_error(ps_matched, &pathspec)) { fprintf(stderr, "Did you forget to 'git add'?\n"); diff --git a/t/t3004-ls-files-basic.sh b/t/t3004-ls-files-basic.sh index a1078f8701dbfb..4034a5a59f58d8 100755 --- a/t/t3004-ls-files-basic.sh +++ b/t/t3004-ls-files-basic.sh @@ -34,6 +34,13 @@ test_expect_success 'ls-files -h in corrupt repository' ' test_grep "[Uu]sage: git ls-files " broken/usage ' +test_expect_success 'ls-files does not crash with -h' ' + test_expect_code 129 git ls-files -h >usage && + test_grep "[Uu]sage: git ls-files " usage && + test_expect_code 129 nongit git ls-files -h >usage && + test_grep "[Uu]sage: git ls-files " usage +' + test_expect_success SYMLINKS 'ls-files with absolute paths to symlinks' ' mkdir subs && ln -s nosuch link && From d9dce89192504c63787b98961ee514937af88e61 Mon Sep 17 00:00:00 2001 From: Usman Akinyemi Date: Sat, 8 Mar 2025 05:05:06 +0530 Subject: [PATCH 057/775] builtin/for-each-ref: stop using `the_repository` Remove the_repository global variable in favor of the repository argument that gets passed in "builtin/for-each-ref.c". When `-h` is passed to the command outside a Git repository, the `run_builtin()` will call the `cmd_for_each_ref()` function with `repo` set to NULL and then early in the function, `parse_options()` call will give the options help and exit. Mentored-by: Christian Couder Signed-off-by: Usman Akinyemi Signed-off-by: Junio C Hamano --- builtin/for-each-ref.c | 5 ++--- t/t6300-for-each-ref.sh | 7 +++++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/builtin/for-each-ref.c b/builtin/for-each-ref.c index 8085ebd8fe97b5..3d2207ec7733b2 100644 --- a/builtin/for-each-ref.c +++ b/builtin/for-each-ref.c @@ -1,4 +1,3 @@ -#define USE_THE_REPOSITORY_VARIABLE #include "builtin.h" #include "commit.h" #include "config.h" @@ -20,7 +19,7 @@ static char const * const for_each_ref_usage[] = { int cmd_for_each_ref(int argc, const char **argv, const char *prefix, - struct repository *repo UNUSED) + struct repository *repo) { struct ref_sorting *sorting; struct string_list sorting_options = STRING_LIST_INIT_DUP; @@ -63,7 +62,7 @@ int cmd_for_each_ref(int argc, format.format = "%(objectname) %(objecttype)\t%(refname)"; - git_config(git_default_config, NULL); + repo_config(repo, git_default_config, NULL); /* Set default (refname) sorting */ string_list_append(&sorting_options, "refname"); diff --git a/t/t6300-for-each-ref.sh b/t/t6300-for-each-ref.sh index a5c77943854738..9b4f4306c4a905 100755 --- a/t/t6300-for-each-ref.sh +++ b/t/t6300-for-each-ref.sh @@ -292,6 +292,13 @@ test_expect_success 'Check invalid atoms names are errors' ' test_must_fail git for-each-ref --format="%(INVALID)" refs/heads ' +test_expect_success 'for-each-ref does not crash with -h' ' + test_expect_code 129 git for-each-ref -h >usage && + test_grep "[Uu]sage: git for-each-ref " usage && + test_expect_code 129 nongit git for-each-ref -h >usage && + test_grep "[Uu]sage: git for-each-ref " usage +' + test_expect_success 'Check format specifiers are ignored in naming date atoms' ' git for-each-ref --format="%(authordate)" refs/heads && git for-each-ref --format="%(authordate:default) %(authordate)" refs/heads && From 09cbf1597edde2b0200ecbf469c78689c4dcb12f Mon Sep 17 00:00:00 2001 From: Usman Akinyemi Date: Sat, 8 Mar 2025 05:05:07 +0530 Subject: [PATCH 058/775] builtin/checkout-index: stop using `the_repository` Remove the_repository global variable in favor of the repository argument that gets passed in "builtin/checkout-index.c". When `-h` is passed to the command outside a Git repository, the `run_builtin()` will call the `cmd_checkout_index()` function with `repo` set to NULL and then early in the function, `show_usage_with_options_if_asked()` call will give the options help and exit. Pass an instance of "struct index_state" available in the calling context to both `checkout_all()` and `checkout_file()` to remove their dependency on the global `the_repository` variable. Mentored-by: Christian Couder Signed-off-by: Usman Akinyemi Signed-off-by: Junio C Hamano --- builtin/checkout-index.c | 43 ++++++++++++++++----------------- t/t2006-checkout-index-basic.sh | 7 ++++++ 2 files changed, 28 insertions(+), 22 deletions(-) diff --git a/builtin/checkout-index.c b/builtin/checkout-index.c index e30086c7d47189..7f74bc702fa739 100644 --- a/builtin/checkout-index.c +++ b/builtin/checkout-index.c @@ -5,7 +5,6 @@ * */ -#define USE_THE_REPOSITORY_VARIABLE #define DISABLE_SIGN_COMPARE_WARNINGS #include "builtin.h" @@ -68,10 +67,10 @@ static void write_tempfile_record(const char *name, const char *prefix) } } -static int checkout_file(const char *name, const char *prefix) +static int checkout_file(struct index_state *index, const char *name, const char *prefix) { int namelen = strlen(name); - int pos = index_name_pos(the_repository->index, name, namelen); + int pos = index_name_pos(index, name, namelen); int has_same_name = 0; int is_file = 0; int is_skipped = 1; @@ -81,8 +80,8 @@ static int checkout_file(const char *name, const char *prefix) if (pos < 0) pos = -pos - 1; - while (pos index->cache_nr) { - struct cache_entry *ce =the_repository->index->cache[pos]; + while (pos < index->cache_nr) { + struct cache_entry *ce = index->cache[pos]; if (ce_namelen(ce) != namelen || memcmp(ce->name, name, namelen)) break; @@ -137,13 +136,13 @@ static int checkout_file(const char *name, const char *prefix) return -1; } -static int checkout_all(const char *prefix, int prefix_length) +static int checkout_all(struct index_state *index, const char *prefix, int prefix_length) { int i, errs = 0; struct cache_entry *last_ce = NULL; - for (i = 0; i < the_repository->index->cache_nr ; i++) { - struct cache_entry *ce = the_repository->index->cache[i]; + for (i = 0; i < index->cache_nr ; i++) { + struct cache_entry *ce = index->cache[i]; if (S_ISSPARSEDIR(ce->ce_mode)) { if (!ce_skip_worktree(ce)) @@ -156,8 +155,8 @@ static int checkout_all(const char *prefix, int prefix_length) * first entry inside the expanded sparse directory). */ if (ignore_skip_worktree) { - ensure_full_index(the_repository->index); - ce = the_repository->index->cache[i]; + ensure_full_index(index); + ce = index->cache[i]; } } @@ -213,7 +212,7 @@ static int option_parse_stage(const struct option *opt, int cmd_checkout_index(int argc, const char **argv, const char *prefix, - struct repository *repo UNUSED) + struct repository *repo) { int i; struct lock_file lock_file = LOCK_INIT; @@ -253,19 +252,19 @@ int cmd_checkout_index(int argc, show_usage_with_options_if_asked(argc, argv, builtin_checkout_index_usage, builtin_checkout_index_options); - git_config(git_default_config, NULL); + repo_config(repo, git_default_config, NULL); prefix_length = prefix ? strlen(prefix) : 0; - prepare_repo_settings(the_repository); - the_repository->settings.command_requires_full_index = 0; + prepare_repo_settings(repo); + repo->settings.command_requires_full_index = 0; - if (repo_read_index(the_repository) < 0) { + if (repo_read_index(repo) < 0) { die("invalid cache"); } argc = parse_options(argc, argv, prefix, builtin_checkout_index_options, builtin_checkout_index_usage, 0); - state.istate = the_repository->index; + state.istate = repo->index; state.force = force; state.quiet = quiet; state.not_new = not_new; @@ -285,8 +284,8 @@ int cmd_checkout_index(int argc, */ if (index_opt && !state.base_dir_len && !to_tempfile) { state.refresh_cache = 1; - state.istate = the_repository->index; - repo_hold_locked_index(the_repository, &lock_file, + state.istate = repo->index; + repo_hold_locked_index(repo, &lock_file, LOCK_DIE_ON_ERROR); } @@ -304,7 +303,7 @@ int cmd_checkout_index(int argc, if (read_from_stdin) die("git checkout-index: don't mix '--stdin' and explicit filenames"); p = prefix_path(prefix, prefix_length, arg); - err |= checkout_file(p, prefix); + err |= checkout_file(repo->index, p, prefix); free(p); } @@ -326,7 +325,7 @@ int cmd_checkout_index(int argc, strbuf_swap(&buf, &unquoted); } p = prefix_path(prefix, prefix_length, buf.buf); - err |= checkout_file(p, prefix); + err |= checkout_file(repo->index, p, prefix); free(p); } strbuf_release(&unquoted); @@ -334,7 +333,7 @@ int cmd_checkout_index(int argc, } if (all) - err |= checkout_all(prefix, prefix_length); + err |= checkout_all(repo->index, prefix, prefix_length); if (pc_workers > 1) err |= run_parallel_checkout(&state, pc_workers, pc_threshold, @@ -344,7 +343,7 @@ int cmd_checkout_index(int argc, return 1; if (is_lock_file_locked(&lock_file) && - write_locked_index(the_repository->index, &lock_file, COMMIT_LOCK)) + write_locked_index(repo->index, &lock_file, COMMIT_LOCK)) die("Unable to write new index file"); return 0; } diff --git a/t/t2006-checkout-index-basic.sh b/t/t2006-checkout-index-basic.sh index bac231b167c2c6..fedd2cc097f349 100755 --- a/t/t2006-checkout-index-basic.sh +++ b/t/t2006-checkout-index-basic.sh @@ -21,6 +21,13 @@ test_expect_success 'checkout-index -h in broken repository' ' test_grep "[Uu]sage" broken/usage ' +test_expect_success 'checkout-index does not crash with -h' ' + test_expect_code 129 git checkout-index -h >usage && + test_grep "[Uu]sage: git checkout-index " usage && + test_expect_code 129 nongit git checkout-index -h >usage && + test_grep "[Uu]sage: git checkout-index " usage +' + test_expect_success 'checkout-index reports errors (cmdline)' ' test_must_fail git checkout-index -- does-not-exist 2>stderr && test_grep not.in.the.cache stderr From 227c4f33a0351d12b04660a9f03ca96dbab1310a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Avila?= Date: Sun, 9 Mar 2025 19:45:11 +0000 Subject: [PATCH 059/775] doc: add a blank line around block delimiters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The documentation is using the historical mode for titles, which is a setext-style (i.e., two-line) section title. The issue with this mode is that starting block delimiters (e.g., `----`) can be confused with a section title when they are exactly the same length as the preceding line. In the original documentation, this is taken care of for English by the writer, but it is not the case for translations where these delimiters are hidden. A translator can generate a line that is exactly the same length as the following block delimiter, which leads to this line being considered as a title. To safeguard against this issue, add a blank line before and after block delimiters where block is at root level, else add a "+" line before block delimiters to link it to the preceding paragraph. Signed-off-by: Jean-Noël Avila Signed-off-by: Junio C Hamano --- Documentation/MyFirstContribution.adoc | 1 + Documentation/MyFirstObjectWalk.adoc | 2 ++ Documentation/ToolsForGit.adoc | 1 + Documentation/git-bisect.adoc | 1 + Documentation/git-cat-file.adoc | 4 ++-- Documentation/git-check-attr.adoc | 6 ++++++ Documentation/git-column.adoc | 3 +++ Documentation/git-cvsserver.adoc | 4 ++++ Documentation/git-for-each-ref.adoc | 2 ++ Documentation/git-p4.adoc | 14 ++++++++++++++ Documentation/git-rebase.adoc | 3 +++ Documentation/gitattributes.adoc | 24 ++++++++++++++++-------- Documentation/gitcli.adoc | 2 +- Documentation/gitprotocol-common.adoc | 2 ++ Documentation/gitweb.adoc | 11 +++++++++++ Documentation/gitweb.conf.adoc | 2 ++ Documentation/rev-list-options.adoc | 2 ++ 17 files changed, 73 insertions(+), 11 deletions(-) diff --git a/Documentation/MyFirstContribution.adoc b/Documentation/MyFirstContribution.adoc index afcf4b46c11ab2..ca1d688c9ba5e1 100644 --- a/Documentation/MyFirstContribution.adoc +++ b/Documentation/MyFirstContribution.adoc @@ -367,6 +367,7 @@ But as we drill down, we can find that `status_init_config()` wraps a call to `git_config()`. Let's modify the code we wrote in the previous commit. Be sure to include the header to allow you to use `struct wt_status`: + ---- #include "wt-status.h" ---- diff --git a/Documentation/MyFirstObjectWalk.adoc b/Documentation/MyFirstObjectWalk.adoc index d6e9dfdbbe232e..bfe8f5f5611209 100644 --- a/Documentation/MyFirstObjectWalk.adoc +++ b/Documentation/MyFirstObjectWalk.adoc @@ -287,6 +287,7 @@ static void final_rev_info_setup(struct rev_info *rev) ==== Instead of using the shorthand `add_head_to_pending()`, you could do something like this: + ---- struct setup_revision_opt opt; @@ -295,6 +296,7 @@ something like this: opt.revarg_opt = REVARG_COMMITTISH; setup_revisions(argc, argv, rev, &opt); ---- + Using a `setup_revision_opt` gives you finer control over your walk's starting point. ==== diff --git a/Documentation/ToolsForGit.adoc b/Documentation/ToolsForGit.adoc index ae7690b45d08b3..a842c1332797fb 100644 --- a/Documentation/ToolsForGit.adoc +++ b/Documentation/ToolsForGit.adoc @@ -34,6 +34,7 @@ This is adapted from Linux's suggestion in its CodingStyle document: - To follow the rules in CodingGuidelines, it's useful to put the following in GIT_CHECKOUT/.dir-locals.el, assuming you use cperl-mode: + ---- ;; note the first part is useful for C editing, too ((nil . ((indent-tabs-mode . t) diff --git a/Documentation/git-bisect.adoc b/Documentation/git-bisect.adoc index 82f944dc03dffc..58dbb74a15760c 100644 --- a/Documentation/git-bisect.adoc +++ b/Documentation/git-bisect.adoc @@ -495,6 +495,7 @@ $ git bisect old HEAD~10 # the tenth commit from now is marked as old ------------ + or: ++ ------------ $ git bisect start --term-old broken --term-new fixed $ git bisect fixed diff --git a/Documentation/git-cat-file.adoc b/Documentation/git-cat-file.adoc index d5890ae3686f6b..30359f5dbdb860 100644 --- a/Documentation/git-cat-file.adoc +++ b/Documentation/git-cat-file.adoc @@ -322,10 +322,10 @@ of `%(objectsize)` bytes), followed by a newline. For example, `--batch` without a custom format would produce: ------------- +----------- SP SP LF LF ------------- +----------- Whereas `--batch-check='%(objectname) %(objecttype)'` would produce: diff --git a/Documentation/git-check-attr.adoc b/Documentation/git-check-attr.adoc index cb5a6c8f335e12..503b6446574d18 100644 --- a/Documentation/git-check-attr.adoc +++ b/Documentation/git-check-attr.adoc @@ -76,6 +76,7 @@ EXAMPLES -------- In the examples, the following '.gitattributes' file is used: + --------------- *.java diff=java -crlf myAttr NoMyAttr.java !myAttr @@ -83,12 +84,14 @@ README caveat=unspecified --------------- * Listing a single attribute: ++ --------------- $ git check-attr diff org/example/MyClass.java org/example/MyClass.java: diff: java --------------- * Listing multiple attributes for a file: ++ --------------- $ git check-attr crlf diff myAttr -- org/example/MyClass.java org/example/MyClass.java: crlf: unset @@ -97,6 +100,7 @@ org/example/MyClass.java: myAttr: set --------------- * Listing all attributes for a file: ++ --------------- $ git check-attr --all -- org/example/MyClass.java org/example/MyClass.java: diff: java @@ -104,6 +108,7 @@ org/example/MyClass.java: myAttr: set --------------- * Listing an attribute for multiple files: ++ --------------- $ git check-attr myAttr -- org/example/MyClass.java org/example/NoMyAttr.java org/example/MyClass.java: myAttr: set @@ -111,6 +116,7 @@ org/example/NoMyAttr.java: myAttr: unspecified --------------- * Not all values are equally unambiguous: ++ --------------- $ git check-attr caveat README README: caveat: unspecified diff --git a/Documentation/git-column.adoc b/Documentation/git-column.adoc index 85fb87c94a4445..5a4f2b6fde9f27 100644 --- a/Documentation/git-column.adoc +++ b/Documentation/git-column.adoc @@ -50,6 +50,7 @@ EXAMPLES -------- Format data by columns: ++ ------------ $ seq 1 24 | git column --mode=column --padding=5 1 4 7 10 13 16 19 22 @@ -58,6 +59,7 @@ $ seq 1 24 | git column --mode=column --padding=5 ------------ Format data by rows: ++ ------------ $ seq 1 21 | git column --mode=row --padding=5 1 2 3 4 5 6 7 @@ -66,6 +68,7 @@ $ seq 1 21 | git column --mode=row --padding=5 ------------ List some tags in a table with unequal column widths: ++ ------------ $ git tag --list 'v2.4.*' --column=row,dense v2.4.0 v2.4.0-rc0 v2.4.0-rc1 v2.4.0-rc2 v2.4.0-rc3 diff --git a/Documentation/git-cvsserver.adoc b/Documentation/git-cvsserver.adoc index 4c475efeab976a..fe822f571d0bea 100644 --- a/Documentation/git-cvsserver.adoc +++ b/Documentation/git-cvsserver.adoc @@ -125,9 +125,11 @@ creation in your platform (e.g. mkpasswd in Linux, encrypt in OpenBSD or pwhash in NetBSD) and paste it in the right location. Then provide your password via the pserver method, for example: + ------ cvs -d:pserver:someuser:somepassword@server:/path/repo.git co ------ + No special setup is needed for SSH access, other than having Git tools in the PATH. If you have clients that do not accept the CVS_SERVER environment variable, you can rename 'git-cvsserver' to `cvs`. @@ -138,6 +140,7 @@ CVS_SERVER directly in CVSROOT like ------ cvs -d ":ext;CVS_SERVER=git cvsserver:user@server/path/repo.git" co ------ + This has the advantage that it will be saved in your 'CVS/Root' files and you don't need to worry about always setting the correct environment variable. SSH users restricted to 'git-shell' don't need to override the default @@ -168,6 +171,7 @@ All configuration variables can also be overridden for a specific method of access. Valid method names are "ext" (for SSH access) and "pserver". The following example configuration would disable pserver access while still allowing access over SSH. + ------ [gitcvs] enabled=0 diff --git a/Documentation/git-for-each-ref.adoc b/Documentation/git-for-each-ref.adoc index ffb97e62c2d94e..5ef89fc0fe3c9d 100644 --- a/Documentation/git-for-each-ref.adoc +++ b/Documentation/git-for-each-ref.adoc @@ -441,6 +441,7 @@ Ref: %(*refname) A simple example showing the use of shell eval on the output, demonstrating the use of --shell. List the prefixes of all heads: + ------------ #!/bin/sh @@ -455,6 +456,7 @@ done A bit more elaborate report on tags, demonstrating that the format may be an entire script: + ------------ #!/bin/sh diff --git a/Documentation/git-p4.adoc b/Documentation/git-p4.adoc index de5ee6748e3588..f97b786bf98a21 100644 --- a/Documentation/git-p4.adoc +++ b/Documentation/git-p4.adoc @@ -80,6 +80,7 @@ This: To reproduce the entire p4 history in Git, use the '@all' modifier on the depot path: + ------------ $ git p4 clone //depot/path/project@all ------------ @@ -89,19 +90,23 @@ Sync ~~~~ As development continues in the p4 repository, those changes can be included in the Git repository using: + ------------ $ git p4 sync ------------ + This command finds new changes in p4 and imports them as Git commits. P4 repositories can be added to an existing Git repository using 'git p4 sync' too: + ------------ $ mkdir repo-git $ cd repo-git $ git init $ git p4 sync //path/in/your/perforce/depot ------------ + This imports the specified depot into 'refs/remotes/p4/master' in an existing Git repository. The `--branch` option can be used to specify a different branch to @@ -125,6 +130,7 @@ and merge them with local uncommitted changes. Often, the p4 repository is the ultimate location for all code, thus a rebase workflow makes sense. This command does 'git p4 sync' followed by 'git rebase' to move local commits on top of updated p4 changes. + ------------ $ git p4 rebase ------------ @@ -140,16 +146,19 @@ will be created and populated if it does not already exist. To submit all changes that are in the current Git branch but not in the 'p4/master' branch, use: + ------------ $ git p4 submit ------------ To specify a branch other than the current one, use: + ------------ $ git p4 submit topicbranch ------------ To specify a single commit or a range of commits, use: + ------------ $ git p4 submit --commit $ git p4 submit --commit @@ -510,20 +519,24 @@ when cloning or syncing to have 'git p4' automatically find subdirectories in p4, and to generate these as branches in Git. For example, if the P4 repository structure is: + ---- //depot/main/... //depot/branch1/... ---- And "p4 branch -o branch1" shows a View line that looks like: + ---- //depot/main/... //depot/branch1/... ---- Then this 'git p4 clone' command: + ---- git p4 clone --detect-branches //depot@all ---- + produces a separate branch in 'refs/remotes/p4/' for //depot/main, called 'master', and one for //depot/branch1 called 'depot/branch1'. @@ -536,6 +549,7 @@ simple p4 branch specification, where the "source" and "destination" are the path elements in the p4 repository. The example above relied on the presence of the p4 branch. Without p4 branches, the same result will occur with: + ---- git init depot cd depot diff --git a/Documentation/git-rebase.adoc b/Documentation/git-rebase.adoc index 153cb69a4f810e..956d3048f5a618 100644 --- a/Documentation/git-rebase.adoc +++ b/Documentation/git-rebase.adoc @@ -1107,10 +1107,12 @@ In that case, the fix is easy because 'git rebase' knows to skip changes that are already present in the new upstream (unless `--reapply-cherry-picks` is given). So if you say (assuming you're on 'topic') + ------------ $ git rebase subsystem ------------ you will end up with the fixed history + ------------ o---o---o---o---o---o---o---o master \ @@ -1145,6 +1147,7 @@ of the old 'subsystem', for example: You can then transplant the old `subsystem..topic` to the new tip by saying (for the reflog case, and assuming you are on 'topic' already): + ------------ $ git rebase --onto subsystem subsystem@{1} ------------ diff --git a/Documentation/gitattributes.adoc b/Documentation/gitattributes.adoc index a22d1ef1e15438..f20041a323d174 100644 --- a/Documentation/gitattributes.adoc +++ b/Documentation/gitattributes.adoc @@ -531,13 +531,14 @@ must not send any response before it received the content and the final flush packet. Also note that the "value" of a "key=value" pair can contain the "=" character whereas the key would never contain that character. ------------------------- + +----------------------- packet: git> command=smudge packet: git> pathname=path/testfile.dat packet: git> 0000 packet: git> CONTENT packet: git> 0000 ------------------------- +----------------------- The filter is expected to respond with a list of "key=value" pairs terminated with a flush packet. If the filter does not experience @@ -559,6 +560,7 @@ packet: git< 0000 # empty list, keep "status=success" unchanged! If the result content is empty then the filter is expected to respond with a "success" status and a flush packet to signal the empty content. + ------------------------ packet: git< status=success packet: git< 0000 @@ -568,14 +570,16 @@ packet: git< 0000 # empty list, keep "status=success" unchanged! In case the filter cannot or does not want to process the content, it is expected to respond with an "error" status. ------------------------- + +----------------------- packet: git< status=error packet: git< 0000 ------------------------- +----------------------- If the filter experiences an error during processing, then it can send the status "error" after the content was (partially or completely) sent. + ------------------------ packet: git< status=success packet: git< 0000 @@ -589,10 +593,11 @@ In case the filter cannot or does not want to process the content as well as any future content for the lifetime of the Git process, then it is expected to respond with an "abort" status at any point in the protocol. ------------------------- + +----------------------- packet: git< status=abort packet: git< 0000 ------------------------- +----------------------- Git neither stops nor restarts the filter process in case the "error"/"abort" status is set. However, Git sets its exit code @@ -613,7 +618,8 @@ flag "can-delay" after the filter command and pathname. This flag denotes that the filter can delay filtering the current blob (e.g. to compensate network latencies) by responding with no content but with the status "delayed" and a flush packet. ------------------------- + +----------------------- packet: git> command=smudge packet: git> pathname=path/testfile.dat packet: git> can-delay=1 @@ -622,7 +628,7 @@ packet: git> CONTENT packet: git> 0000 packet: git< status=delayed packet: git< 0000 ------------------------- +----------------------- If the filter supports the "delay" capability then it must support the "list_available_blobs" command. If Git sends this command, then the @@ -647,10 +653,12 @@ packet: git< status=success packet: git< 0000 ------------------------ + After Git received the pathnames, it will request the corresponding blobs again. These requests contain a pathname and an empty content section. The filter is expected to respond with the smudged content in the usual way as explained above. + ------------------------ packet: git> command=smudge packet: git> pathname=path/testfile.dat diff --git a/Documentation/gitcli.adoc b/Documentation/gitcli.adoc index 04193ec907827f..1ea681b59da0aa 100644 --- a/Documentation/gitcli.adoc +++ b/Documentation/gitcli.adoc @@ -209,13 +209,13 @@ $ git foo -o Arg However, this is *NOT* allowed for switches with an optional value, where the 'stuck' form must be used: + ---------------------------- $ git describe --abbrev HEAD # correct $ git describe --abbrev=10 HEAD # correct $ git describe --abbrev 10 HEAD # NOT WHAT YOU MEANT ---------------------------- - NOTES ON FREQUENTLY CONFUSED OPTIONS ------------------------------------ diff --git a/Documentation/gitprotocol-common.adoc b/Documentation/gitprotocol-common.adoc index cdc9d6e707586c..b4a5316ca4bf5e 100644 --- a/Documentation/gitprotocol-common.adoc +++ b/Documentation/gitprotocol-common.adoc @@ -21,11 +21,13 @@ ABNF Notation ABNF notation as described by RFC 5234 is used within the protocol documents, except the following replacement core rules are used: + ---- HEXDIG = DIGIT / "a" / "b" / "c" / "d" / "e" / "f" ---- We also define the following common rules: + ---- NUL = %x00 zero-id = 40*"0" diff --git a/Documentation/gitweb.adoc b/Documentation/gitweb.adoc index 5e2b491ec2256b..4261f9e235db82 100644 --- a/Documentation/gitweb.adoc +++ b/Documentation/gitweb.adoc @@ -103,6 +103,7 @@ You can generate the projects list index file using the project_index action "Generating projects list using gitweb" section below. Example contents: + ----------------------------------------------------------------------- foo.git Joe+R+Hacker+ foo/bar.git O+W+Ner+ @@ -124,6 +125,7 @@ Generating projects list using gitweb We assume that GITWEB_CONFIG has its default Makefile value, namely 'gitweb_config.perl'. Put the following in 'gitweb_make_index.perl' file: + ---------------------------------------------------------------------------- read_config_file("gitweb_config.perl"); $projects_list = $projectroot; @@ -518,12 +520,14 @@ rules. If you use the rewrite rules from the example you *might* also need something like the following in your gitweb configuration file (`/etc/gitweb.conf` following example): + ---------------------------------------------------------------------------- @stylesheets = ("/some/absolute/path/gitweb.css"); $my_uri = "/"; $home_link = "/"; $per_request_config = 1; ---------------------------------------------------------------------------- + Nowadays though gitweb should create HTML base tag when needed (to set base URI for relative links), so it should work automatically. @@ -535,6 +539,7 @@ Apache virtual host and gitweb configuration files in the following way. The virtual host configuration (in Apache configuration file) should look like this: + -------------------------------------------------------------------------- ServerName git.example.org @@ -575,9 +580,11 @@ like this: Here actual project root is passed to gitweb via `GITWEB_PROJECT_ROOT` environment variable from a web server, so you need to put the following line in gitweb configuration file (`/etc/gitweb.conf` in above example): + -------------------------------------------------------------------------- $projectroot = $ENV{'GITWEB_PROJECTROOT'} || "/pub/git"; -------------------------------------------------------------------------- + *Note* that this requires to be set for each request, so either `$per_request_config` must be false, or the above must be put in code referenced by `$per_request_config`; @@ -604,9 +611,11 @@ the third and the fourth. PATH_INFO usage ~~~~~~~~~~~~~~~ If you enable PATH_INFO usage in gitweb by putting + ---------------------------------------------------------------------------- $feature{'pathinfo'}{'default'} = [1]; ---------------------------------------------------------------------------- + in your gitweb configuration file, it is possible to set up your server so that it consumes and produces URLs in the form @@ -636,6 +645,7 @@ complementary static files (stylesheet, favicon, JavaScript): ---------------------------------------------------------------------------- + The rewrite rule guarantees that existing static files will be properly served, whereas any other URL will be passed to gitweb as PATH_INFO parameter. @@ -647,6 +657,7 @@ for fetching" section). A possible workaround for the latter is the following: in your project root dir (e.g. `/pub/git`) have the projects named *without* a .git extension (e.g. `/pub/git/project` instead of `/pub/git/project.git`) and configure Apache as follows: + ---------------------------------------------------------------------------- ServerAlias git.example.com diff --git a/Documentation/gitweb.conf.adoc b/Documentation/gitweb.conf.adoc index 85983587fcffa8..1348e9b12504db 100644 --- a/Documentation/gitweb.conf.adoc +++ b/Documentation/gitweb.conf.adoc @@ -603,6 +603,7 @@ Many gitweb features can be enabled (or disabled) and configured using the Each `%feature` hash element is a hash reference and has the following structure: + ---------------------------------------------------------------------- "" => { "sub" => , @@ -613,6 +614,7 @@ structure: Some features cannot be overridden per project. For those features the structure of appropriate `%feature` hash element has a simpler form: + ---------------------------------------------------------------------- "" => { "override" => 0, diff --git a/Documentation/rev-list-options.adoc b/Documentation/rev-list-options.adoc index 785c0786e0cf2c..9d020e305a7f5c 100644 --- a/Documentation/rev-list-options.adoc +++ b/Documentation/rev-list-options.adoc @@ -429,6 +429,7 @@ filtered for `foo`, they look different and equal, respectively.) In the following, we will always refer to the same example history to illustrate the differences between simplification settings. We assume that you are filtering for a file `foo` in this commit graph: + ----------------------------------------------------------------------- .-A---M---N---O---P---Q / / / / / / @@ -436,6 +437,7 @@ that you are filtering for a file `foo` in this commit graph: \ / / / / / `-------------' X ----------------------------------------------------------------------- + The horizontal line of history A---Q is taken to be the first parent of each merge. The commits are: From 09781e379bd72734adf7c3ac10a5cf25b50f803d Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sat, 8 Mar 2025 22:01:23 -0500 Subject: [PATCH 060/775] t5702: fix typo in test name Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/t5702-protocol-v2.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/t5702-protocol-v2.sh b/t/t5702-protocol-v2.sh index d3df81e7852d7d..cea8f92a3da065 100755 --- a/t/t5702-protocol-v2.sh +++ b/t/t5702-protocol-v2.sh @@ -665,7 +665,7 @@ test_expect_success 'even with handcrafted request, filter does not work if not test-tool -C server serve-v2 --stateless-rpc /dev/null ' -test_expect_success 'default refspec is used to filter ref when fetchcing' ' +test_expect_success 'default refspec is used to filter ref when fetching' ' test_when_finished "rm -f log" && GIT_TRACE_PACKET="$(pwd)/log" git -C file_child -c protocol.version=2 \ From 2de68c046e100fa441816d9c9cf30dbe272b6448 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sat, 8 Mar 2025 22:01:40 -0500 Subject: [PATCH 061/775] t5516: prefer "oid" to "sha1" in some test titles These old tests refer to object ids as "sha1". These days we prefer the more algorithm-agnostic "oid". There are a few more tests that mention sha1 in the title and also use it in variables throughout the test. I've left them for now, as changing them is more involved (and they're linked to the allowTipSHA1InWant config, which as a v0-only thing actually is always sha1). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/t5516-fetch-push.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/t/t5516-fetch-push.sh b/t/t5516-fetch-push.sh index 85ed049627d2e6..e7629fc536a54c 100755 --- a/t/t5516-fetch-push.sh +++ b/t/t5516-fetch-push.sh @@ -495,7 +495,7 @@ test_expect_success 'push tag with non-existent, incomplete dest' ' ' -test_expect_success 'push sha1 with non-existent, incomplete dest' ' +test_expect_success 'push oid with non-existent, incomplete dest' ' mk_test testrepo && test_must_fail git push testrepo $(git rev-parse main):foo @@ -1251,7 +1251,7 @@ do ' done -test_expect_success 'fetch exact SHA1' ' +test_expect_success 'fetch exact oid' ' mk_test testrepo heads/main hidden/one && git push testrepo main:refs/hidden/one && ( @@ -1297,7 +1297,7 @@ test_expect_success 'fetch exact SHA1' ' ) ' -test_expect_success 'fetch exact SHA1 in protocol v2' ' +test_expect_success 'fetch exact oid in protocol v2' ' mk_test testrepo heads/main hidden/one && git push testrepo main:refs/hidden/one && git -C testrepo config transfer.hiderefs refs/hidden && From 6ea26f34c95a333d633e2b691805df0c62e6d568 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sat, 8 Mar 2025 22:02:03 -0500 Subject: [PATCH 062/775] t5516: drop NEEDSWORK about v2 reachability behavior When this test was added in 6c301adb0a (fetch: do not pass ref-prefixes for fetch by exact SHA1, 2018-05-31), there was still some uncertainty about the v2 protocol's looser behavior with serving objects that are not directly pointed at by a ref. At this point that behavior is well established, and I do not think we would ever change v2 to match the v0 behavior (and if we did, remembering to update this test is the least of our concerns). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/t5516-fetch-push.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/t/t5516-fetch-push.sh b/t/t5516-fetch-push.sh index e7629fc536a54c..e4008f3ca62059 100755 --- a/t/t5516-fetch-push.sh +++ b/t/t5516-fetch-push.sh @@ -1312,7 +1312,6 @@ test_expect_success 'fetch exact oid in protocol v2' ' test_must_fail git -C child cat-file -t $the_commit && # fetching the hidden object succeeds by default - # NEEDSWORK: should this match the v0 behavior instead? git -C child fetch -v ../testrepo $the_commit:refs/heads/copy ' From 821d8f215769c789becd53830af590176109f8bb Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sat, 8 Mar 2025 22:02:47 -0500 Subject: [PATCH 063/775] t5516: beef up exact-oid ref prefixes test Commit 6c301adb0a (fetch: do not pass ref-prefixes for fetch by exact SHA1, 2018-05-31) added a test that fetching an exact oid with the v2 protocol works. Originally it failed without the code change from that commit, because fetch failed with "no matching remote head". That changed in 0177565148 (transport: do not list refs if possible, 2018-09-27), which made fetch more forgiving of this case. But that now meant the test passes even without its fix! So let's also have it check the packet listing to make sure we did not ask for the bogus prefix (ultimately this is less important than whether the command fails, since it's just an optimization, but we should make sure not to regress it). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/t5516-fetch-push.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/t/t5516-fetch-push.sh b/t/t5516-fetch-push.sh index e4008f3ca62059..2904399e97768e 100755 --- a/t/t5516-fetch-push.sh +++ b/t/t5516-fetch-push.sh @@ -1312,7 +1312,10 @@ test_expect_success 'fetch exact oid in protocol v2' ' test_must_fail git -C child cat-file -t $the_commit && # fetching the hidden object succeeds by default - git -C child fetch -v ../testrepo $the_commit:refs/heads/copy + GIT_TRACE_PACKET=$PWD/trace.out \ + git -C child fetch -v ../testrepo $the_commit:refs/heads/copy && + + test_grep ! "ref-prefix.*$the_commit" trace.out ' for configallowtipsha1inwant in true false From 36b12c3248042280b1d41bdba1457f7ac46f2250 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sat, 8 Mar 2025 22:07:06 -0500 Subject: [PATCH 064/775] refspec_ref_prefixes(): clean up refspec_item logic The point of refspec_ref_prefixes() is to look over the set of refspecs and set up an appropriate list of "ref-prefix" strings to send to the server. The logic for handling individual refspec_items has some confusing bits. The final part of our if/else cascade checks this: else if (item->src && !item->exact_sha1) prefix = item->src; But we know that "item->exact_sha1" can never be true, because earlier we did: if (item->exact_sha1 || item->negative) continue; This is due to 6c301adb0a (fetch: do not pass ref-prefixes for fetch by exact SHA1, 2018-05-31), which added the continue. So it is tempting to remove the extra exact_sha1 at the end of the cascade, leaving the one at the top of the loop. But I don't think that's quite right. The full cascade is: if (rs->fetch == REFSPEC_FETCH) prefix = item->src; else if (item->dst) prefix = item->dst; else if (item->src && !item->exact_sha1) prefix = item->src; which all comes from 6373cb598e (refspec: consolidate ref-prefix generation logic, 2018-05-16). That first "if" is supposed to handle fetches, where we care about the source name, since that is coming from the server. And the rest should be for pushes, where we care about the destination, since that's the name the server will use. And we get that either explicitly from "dst" (for something like "foo:bar") or implicitly from the source (a refspec like "foo" is treated as "foo:foo"). But how should exact_sha1 interact with those? For a fetch, exact_sha1 always means we do not care about sending a name to the server (there is no server refname at all). But pushing an exact sha1 should still care about the destination on the server! It is only if we have to fall back to the implicit source that we need to care if it is a real ref (though arguably such a push does not even make sense; where would the server store it?). So I think that 6c301adb0a "broke" the push case by always skipping exact_sha1 items, even though a push should only care about the destination. Of course this is all completely academic. We have still not implemented a v2 push protocol, so even though we do call this function for pushes, we'd never actually send these ref-prefix lines. However, given the effort I spent to figure out what was going on here, and the overlapping exact_sha1 checks, I'd like to rewrite this to preemptively fix the bug, and hopefully make it less confusing. This splits the "if" at the top-level into fetch vs push, and then each handles exact_sha1 appropriately itself. The check for negative refspecs remains outside of either (there is no protocol support for them, so we never send them to the server, but rather use them only to reduce the advertisement we receive). The resulting behavior should be identical for fetches, but hopefully sets us up better for a potential future v2 push. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- refspec.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/refspec.c b/refspec.c index 4cb80b520813d1..c6ad515f041fed 100644 --- a/refspec.c +++ b/refspec.c @@ -246,14 +246,24 @@ void refspec_ref_prefixes(const struct refspec *rs, const struct refspec_item *item = &rs->items[i]; const char *prefix = NULL; - if (item->exact_sha1 || item->negative) + if (item->negative) continue; - if (rs->fetch == REFSPEC_FETCH) - prefix = item->src; - else if (item->dst) - prefix = item->dst; - else if (item->src && !item->exact_sha1) + + if (rs->fetch == REFSPEC_FETCH) { + if (item->exact_sha1) + continue; prefix = item->src; + } else { + /* + * Pushes can have an explicit destination like + * "foo:bar", or can implicitly use the src for both + * ("foo" is the same as "foo:foo"). + */ + if (item->dst) + prefix = item->dst; + else if (item->src && !item->exact_sha1) + prefix = item->src; + } if (!prefix) continue; From 625ed92134acd8a1c8e9b795817b04189bd2a1f7 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sat, 8 Mar 2025 22:08:47 -0500 Subject: [PATCH 065/775] fetch: ask server to advertise HEAD for config-less fetch If we're not given any refspecs (either on the command line or via config) and we have no branch merge config, then we fetch the remote HEAD into our local FETCH_HEAD. In that case we do not send any ref-prefix option to the server at all, and we see the full advertisement. But this is sub-optimal. We only care about HEAD, so we can just ask for that, and ignore all of the other refs. The new test demonstrates a case where we see fewer refs (in this case only one less, but in theory we could be ignoring millions of them). This also removes the only case where we care about seeing some refs from the other side, but don't add anything to the ref_prefixes list. Cleaning this up means one less maintenance burden. Before this patch, any code which wanted to add to the list had to make sure the list was not empty, since an empty list meant "ask for everything". Now it really means "we are not interested in any refs". This should let us optimize a few more cases in subsequent patches. Note that we'll add "HEAD" to the list of prefixes, and later code for updating "refs/remotes//HEAD" may likewise do so. In theory this could cause duplicates in the list, but in practice these can't both trigger. We hit our new case only if there are no refspecs, and the "/HEAD" feature is enabled only when we are fetching from a remote with configured refspecs. We could be defensive with a flag, but it didn't seem worth it to me (the absolute worse case is a useless redundant ref-prefix line sent to the server). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/fetch.c | 8 ++++++++ t/t5702-protocol-v2.sh | 15 +++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/builtin/fetch.c b/builtin/fetch.c index 95fd0018b981fb..f14275644166d8 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -1766,6 +1766,14 @@ static int do_fetch(struct transport *transport, branch->merge[i]->src); } } + + /* + * If there are no refs specified to fetch, then we just + * fetch HEAD; mention that to narrow the advertisement. + */ + if (!transport_ls_refs_options.ref_prefixes.nr) + strvec_push(&transport_ls_refs_options.ref_prefixes, + "HEAD"); } if (tags == TAGS_SET || tags == TAGS_DEFAULT) { diff --git a/t/t5702-protocol-v2.sh b/t/t5702-protocol-v2.sh index cea8f92a3da065..2f0a52a72d1636 100755 --- a/t/t5702-protocol-v2.sh +++ b/t/t5702-protocol-v2.sh @@ -679,6 +679,21 @@ test_expect_success 'default refspec is used to filter ref when fetching' ' grep "ref-prefix refs/tags/" log ' +test_expect_success 'set up parent for prefix tests' ' + git init prefix-parent && + git -C prefix-parent commit --allow-empty -m foo && + git -C prefix-parent branch unrelated-branch +' + +test_expect_success 'empty refspec filters refs when fetching' ' + git init configless-child && + + test_when_finished "rm -f log" && + GIT_TRACE_PACKET="$(pwd)/log" \ + git -C configless-child fetch ../prefix-parent && + test_grep ! unrelated-branch log +' + test_expect_success 'fetch supports various ways of have lines' ' rm -rf server client trace && git init server && From 095bc13f35b398b481ecd87699fea6b190488c15 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sat, 8 Mar 2025 22:10:39 -0500 Subject: [PATCH 066/775] fetch: stop protecting additions to ref-prefix list When using the ref-prefix feature of protocol v2, a client which sends no prefixes at all will get the full advertisement. And so the code in git-fetch was historically loose about setting up that list based on our refspecs. There were cases where we needed to know about some refs, so we just didn't add anything to the ref-prefix list. And hence further code, like that for tag-following and updating origin/HEAD, had to be careful about adding to an empty list. E.g., see the bug fixed by bd52d9a058 (fetch: fix following tags when fetching specific OID, 2025-03-07). But the previous commit removed the last such case, and now we know an empty ref-prefix list (at least inside git-fetch's do_fetch() function) means that we really don't need to see any refs. So we can drop those extra conditionals. This simplifies the code a little. But it also means that some cases can now use ref prefixes when they would not otherwise. As the test shows, fetching an exact oid into a local ref can now avoid enumerating all of the refs. The refspec itself doesn't need to know about any remote refs, and the tag auto-following can just ask about refs/tags/. The same is true for asking about HEAD to update the local origin/HEAD. I didn't add a test for that yet, though, as we can optimize it even further. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/fetch.c | 10 ++++------ t/t5702-protocol-v2.sh | 14 ++++++++++++++ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/builtin/fetch.c b/builtin/fetch.c index f14275644166d8..6ab101fa6dfb45 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -1778,16 +1778,14 @@ static int do_fetch(struct transport *transport, if (tags == TAGS_SET || tags == TAGS_DEFAULT) { must_list_refs = 1; - if (transport_ls_refs_options.ref_prefixes.nr) - strvec_push(&transport_ls_refs_options.ref_prefixes, - "refs/tags/"); + strvec_push(&transport_ls_refs_options.ref_prefixes, + "refs/tags/"); } if (uses_remote_tracking(transport, rs)) { must_list_refs = 1; - if (transport_ls_refs_options.ref_prefixes.nr) - strvec_push(&transport_ls_refs_options.ref_prefixes, - "HEAD"); + strvec_push(&transport_ls_refs_options.ref_prefixes, + "HEAD"); } if (must_list_refs) { diff --git a/t/t5702-protocol-v2.sh b/t/t5702-protocol-v2.sh index 2f0a52a72d1636..626deb05f046ca 100755 --- a/t/t5702-protocol-v2.sh +++ b/t/t5702-protocol-v2.sh @@ -682,6 +682,7 @@ test_expect_success 'default refspec is used to filter ref when fetching' ' test_expect_success 'set up parent for prefix tests' ' git init prefix-parent && git -C prefix-parent commit --allow-empty -m foo && + git -C prefix-parent tag my-tag && git -C prefix-parent branch unrelated-branch ' @@ -694,6 +695,19 @@ test_expect_success 'empty refspec filters refs when fetching' ' test_grep ! unrelated-branch log ' +test_expect_success 'exact oid fetch with tag following' ' + git init exact-oid-tags && + + commit=$(git -C prefix-parent rev-parse --verify HEAD) && + + test_when_finished "rm -f log" && + GIT_TRACE_PACKET="$(pwd)/log" \ + git -C exact-oid-tags fetch ../prefix-parent \ + $commit:refs/heads/exact && + test_grep ! unrelated-branch log && + git -C exact-oid-tags rev-parse --verify my-tag +' + test_expect_success 'fetch supports various ways of have lines' ' rm -rf server client trace && git init server && From 20010b8c2030867c0e8d55caad7cda2042ac950f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sat, 8 Mar 2025 22:20:16 -0500 Subject: [PATCH 067/775] fetch: avoid ls-refs only to ask for HEAD symref update When we fetch from a configured remote, we may try to update the local refs/remotes//HEAD, and so we ask the server to advertise its HEAD to us. But if we aren't otherwise asking about any refs at all, then we know this HEAD update can never happen! To consider a new value for HEAD, the set_head() function uses guess_remote_head(). And even if it sees an explicit symref value for HEAD, it will only report that as a match if we also saw that remote ref advertised, and it mapped to a local tracking ref via get_fetch_map(). In other words, a fetch like this: git fetch origin $exact_oid:refs/heads/foo can never update HEAD, because we will never have fetched (nor even see the advertisement for) the ref that HEAD points to. Currently the command above will still call ls-refs to ask about the HEAD, even though it is pointless. This patch teaches it to skip the ls-refs call entirely in this case, which avoids a round-trip to the server. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/fetch.c | 5 ++--- t/t5702-protocol-v2.sh | 13 +++++++++++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/builtin/fetch.c b/builtin/fetch.c index 6ab101fa6dfb45..c26866e6745ef2 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -1782,11 +1782,10 @@ static int do_fetch(struct transport *transport, "refs/tags/"); } - if (uses_remote_tracking(transport, rs)) { - must_list_refs = 1; + if (must_list_refs && + uses_remote_tracking(transport, rs)) strvec_push(&transport_ls_refs_options.ref_prefixes, "HEAD"); - } if (must_list_refs) { trace2_region_enter("fetch", "remote_refs", the_repository); diff --git a/t/t5702-protocol-v2.sh b/t/t5702-protocol-v2.sh index 626deb05f046ca..4d0cbe98723464 100755 --- a/t/t5702-protocol-v2.sh +++ b/t/t5702-protocol-v2.sh @@ -708,6 +708,19 @@ test_expect_success 'exact oid fetch with tag following' ' git -C exact-oid-tags rev-parse --verify my-tag ' +test_expect_success 'exact oid fetch avoids pointless HEAD request' ' + git init exact-oid-head && + git -C exact-oid-head remote add origin ../prefix-parent && + + commit=$(git -C prefix-parent rev-parse --verify HEAD) && + + test_when_finished "rm -f log" && + GIT_TRACE_PACKET="$(pwd)/log" \ + git -C exact-oid-head fetch --no-tags origin \ + $commit:refs/heads/exact && + test_grep ! command=ls-refs log +' + test_expect_success 'fetch supports various ways of have lines' ' rm -rf server client trace && git init server && From c702dd48567cfebca3d4a06b691de97da3f8dc4a Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sat, 8 Mar 2025 22:21:59 -0500 Subject: [PATCH 068/775] fetch: use ref prefix list to skip ls-refs In git-fetch we have an optimization to avoid issuing an ls-refs command to the server if we don't care about the value of any refs (e.g., because we are fetching exact object ids), saving a round-trip to the server. This comes from e70a3030e7 (fetch: do not list refs if fetching only hashes, 2018-09-27). It uses an explicit flag "must_list_refs" to decide when we need to do so. That was needed back then, because the list of ref-prefixes was not always complete. If it was empty, it did not necessarily mean that we were not interested in any refs). But that is no longer the case; an empty list of prefixes means that we truly do not care about any refs. And so rather than an explicit flag, we can just check whether we are interested in any ref prefixes. This simplifies the code slightly, as there is now a single source of truth for the decision. It also fixes a bug in / optimizes a very unlikely case, which is: git fetch $remote ^foo $oid I.e., a negative refspec combined with an exact oid fetch. This is somewhat nonsense, in that there are no positive refspecs mentioning refs to countermand with the negative one. But we should be able to do this without issuing an ls-refs command (excluding "foo" from the empty set will obviously still be the empty set). However, the current code does not do so. The negative refspec is not counted as a noop in un-setting the must_list_refs flag (hardly the fault of e70a3030e7, as negative refspecs did not appear until much later). But by using the prefix list as a source of truth, this naturally just works; the negative refspec does not add a prefix to ask about, and hence does not trigger the ls-refs call. This is esoteric enough that I didn't bother adding a test. The real value here is in the code simplification. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/fetch.c | 27 +++++++-------------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/builtin/fetch.c b/builtin/fetch.c index c26866e6745ef2..02af50546908e1 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -1718,7 +1718,6 @@ static int do_fetch(struct transport *transport, const struct ref *remote_refs; struct transport_ls_refs_options transport_ls_refs_options = TRANSPORT_LS_REFS_OPTIONS_INIT; - int must_list_refs = 1; struct fetch_head fetch_head = { 0 }; struct strbuf err = STRBUF_INIT; @@ -1737,21 +1736,7 @@ static int do_fetch(struct transport *transport, } if (rs->nr) { - int i; - refspec_ref_prefixes(rs, &transport_ls_refs_options.ref_prefixes); - - /* - * We can avoid listing refs if all of them are exact - * OIDs - */ - must_list_refs = 0; - for (i = 0; i < rs->nr; i++) { - if (!rs->items[i].exact_sha1) { - must_list_refs = 1; - break; - } - } } else { struct branch *branch = branch_get(NULL); @@ -1776,18 +1761,20 @@ static int do_fetch(struct transport *transport, "HEAD"); } - if (tags == TAGS_SET || tags == TAGS_DEFAULT) { - must_list_refs = 1; + if (tags == TAGS_SET || tags == TAGS_DEFAULT) strvec_push(&transport_ls_refs_options.ref_prefixes, "refs/tags/"); - } - if (must_list_refs && + if (transport_ls_refs_options.ref_prefixes.nr && uses_remote_tracking(transport, rs)) strvec_push(&transport_ls_refs_options.ref_prefixes, "HEAD"); - if (must_list_refs) { + /* + * Only initiate ref listing if we have at least one ref we want to + * know about. + */ + if (transport_ls_refs_options.ref_prefixes.nr) { trace2_region_enter("fetch", "remote_refs", the_repository); remote_refs = transport_get_remote_refs(transport, &transport_ls_refs_options); From 228457c9d9f32f000f5c04c36fcce9002f72965a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 10 Mar 2025 08:13:20 +0100 Subject: [PATCH 069/775] csum-file: stop depending on `the_repository` There are multiple sites in "csum-file.c" where we use the global `the_repository` variable, either explicitly or implicitly by using `the_hash_algo`. Refactor the code to stop using `the_repository` by adapting functions to receive required data as parameters. Adapt callsites accordingly by either using `the_repository->hash_algo`, or by using a context-provided hash algorithm in case the subsystem already got rid of its dependency on `the_repository`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/fast-import.c | 2 +- builtin/index-pack.c | 2 +- builtin/pack-objects.c | 3 ++- commit-graph.c | 9 ++++++--- csum-file.c | 28 ++++++++++++++++------------ csum-file.h | 12 ++++++++---- midx-write.c | 6 ++++-- midx.c | 3 ++- pack-bitmap-write.c | 2 +- pack-bitmap.c | 9 +++++---- pack-check.c | 2 +- pack-revindex.c | 3 ++- pack-write.c | 12 ++++++------ read-cache.c | 2 +- 14 files changed, 56 insertions(+), 39 deletions(-) diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 397a6f46ad85d5..86e6e754816b9a 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -770,7 +770,7 @@ static void start_packfile(void) p->pack_fd = pack_fd; p->do_not_close = 1; p->repo = the_repository; - pack_file = hashfd(pack_fd, p->pack_name); + pack_file = hashfd(the_repository->hash_algo, pack_fd, p->pack_name); pack_data = p; pack_size = write_pack_header(pack_file, 0); diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 52cc97d52cb674..3eb5af20950ba0 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -1381,7 +1381,7 @@ static void conclude_pack(int fix_thin_pack, const char *curr_pack, unsigned cha REALLOC_ARRAY(objects, nr_objects + nr_unresolved + 1); memset(objects + nr_objects + 1, 0, nr_unresolved * sizeof(*objects)); - f = hashfd(output_fd, curr_pack); + f = hashfd(the_repository->hash_algo, output_fd, curr_pack); fix_unresolved_deltas(f); strbuf_addf(&msg, Q_("completed with %d local object", "completed with %d local objects", diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 58a9b1612626e0..8e282f2a980dbf 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1311,7 +1311,8 @@ static void write_pack_file(void) char *pack_tmp_name = NULL; if (pack_to_stdout) - f = hashfd_throughput(1, "", progress_state); + f = hashfd_throughput(the_repository->hash_algo, 1, + "", progress_state); else f = create_tmp_packfile(&pack_tmp_name); diff --git a/commit-graph.c b/commit-graph.c index 1021ccb983d4ee..8286d5dda241ff 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -2090,11 +2090,13 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) return -1; } - f = hashfd(get_tempfile_fd(graph_layer), get_tempfile_path(graph_layer)); + f = hashfd(the_repository->hash_algo, + get_tempfile_fd(graph_layer), get_tempfile_path(graph_layer)); } else { hold_lock_file_for_update_mode(&lk, ctx->graph_name, LOCK_DIE_ON_ERROR, 0444); - f = hashfd(get_lock_file_fd(&lk), get_lock_file_path(&lk)); + f = hashfd(the_repository->hash_algo, + get_lock_file_fd(&lk), get_lock_file_path(&lk)); } cf = init_chunkfile(f); @@ -2716,7 +2718,8 @@ static void graph_report(const char *fmt, ...) static int commit_graph_checksum_valid(struct commit_graph *g) { - return hashfile_checksum_valid(g->data, g->data_len); + return hashfile_checksum_valid(the_repository->hash_algo, + g->data, g->data_len); } static int verify_one_commit_graph(struct repository *r, diff --git a/csum-file.c b/csum-file.c index b58c183a4f020a..6e21e3cac8a636 100644 --- a/csum-file.c +++ b/csum-file.c @@ -8,8 +8,6 @@ * able to verify hasn't been messed with afterwards. */ -#define USE_THE_REPOSITORY_VARIABLE - #include "git-compat-util.h" #include "csum-file.h" #include "git-zlib.h" @@ -148,21 +146,23 @@ void hashwrite(struct hashfile *f, const void *buf, unsigned int count) } } -struct hashfile *hashfd_check(const char *name) +struct hashfile *hashfd_check(const struct git_hash_algo *algop, + const char *name) { int sink, check; struct hashfile *f; sink = xopen("/dev/null", O_WRONLY); check = xopen(name, O_RDONLY); - f = hashfd(sink, name); + f = hashfd(algop, sink, name); f->check_fd = check; f->check_buffer = xmalloc(f->buffer_len); return f; } -static struct hashfile *hashfd_internal(int fd, const char *name, +static struct hashfile *hashfd_internal(const struct git_hash_algo *algop, + int fd, const char *name, struct progress *tp, size_t buffer_len) { @@ -176,7 +176,7 @@ static struct hashfile *hashfd_internal(int fd, const char *name, f->do_crc = 0; f->skip_hash = 0; - f->algop = unsafe_hash_algo(the_hash_algo); + f->algop = unsafe_hash_algo(algop); f->algop->init_fn(&f->ctx); f->buffer_len = buffer_len; @@ -186,17 +186,19 @@ static struct hashfile *hashfd_internal(int fd, const char *name, return f; } -struct hashfile *hashfd(int fd, const char *name) +struct hashfile *hashfd(const struct git_hash_algo *algop, + int fd, const char *name) { /* * Since we are not going to use a progress meter to * measure the rate of data passing through this hashfile, * use a larger buffer size to reduce fsync() calls. */ - return hashfd_internal(fd, name, NULL, 128 * 1024); + return hashfd_internal(algop, fd, name, NULL, 128 * 1024); } -struct hashfile *hashfd_throughput(int fd, const char *name, struct progress *tp) +struct hashfile *hashfd_throughput(const struct git_hash_algo *algop, + int fd, const char *name, struct progress *tp) { /* * Since we are expecting to report progress of the @@ -204,7 +206,7 @@ struct hashfile *hashfd_throughput(int fd, const char *name, struct progress *tp * size so the progress indicators arrive at a more * frequent rate. */ - return hashfd_internal(fd, name, tp, 8 * 1024); + return hashfd_internal(algop, fd, name, tp, 8 * 1024); } void hashfile_checkpoint_init(struct hashfile *f, @@ -246,13 +248,15 @@ uint32_t crc32_end(struct hashfile *f) return f->crc32; } -int hashfile_checksum_valid(const unsigned char *data, size_t total_len) +int hashfile_checksum_valid(const struct git_hash_algo *algop, + const unsigned char *data, size_t total_len) { unsigned char got[GIT_MAX_RAWSZ]; struct git_hash_ctx ctx; - const struct git_hash_algo *algop = unsafe_hash_algo(the_hash_algo); size_t data_len = total_len - algop->rawsz; + algop = unsafe_hash_algo(algop); + if (total_len < algop->rawsz) return 0; /* say "too short"? */ diff --git a/csum-file.h b/csum-file.h index ffccbf09966c08..07ae11024afc34 100644 --- a/csum-file.h +++ b/csum-file.h @@ -45,9 +45,12 @@ int hashfile_truncate(struct hashfile *, struct hashfile_checkpoint *); #define CSUM_FSYNC 2 #define CSUM_HASH_IN_STREAM 4 -struct hashfile *hashfd(int fd, const char *name); -struct hashfile *hashfd_check(const char *name); -struct hashfile *hashfd_throughput(int fd, const char *name, struct progress *tp); +struct hashfile *hashfd(const struct git_hash_algo *algop, + int fd, const char *name); +struct hashfile *hashfd_check(const struct git_hash_algo *algop, + const char *name); +struct hashfile *hashfd_throughput(const struct git_hash_algo *algop, + int fd, const char *name, struct progress *tp); /* * Free the hashfile without flushing its contents to disk. This only @@ -66,7 +69,8 @@ void crc32_begin(struct hashfile *); uint32_t crc32_end(struct hashfile *); /* Verify checksum validity while reading. Returns non-zero on success. */ -int hashfile_checksum_valid(const unsigned char *data, size_t len); +int hashfile_checksum_valid(const struct git_hash_algo *algop, + const unsigned char *data, size_t len); /* * Returns the total number of bytes fed to the hashfile so far (including ones diff --git a/midx-write.c b/midx-write.c index 48d6558253ec93..26d9d8bb148a4e 100644 --- a/midx-write.c +++ b/midx-write.c @@ -1342,10 +1342,12 @@ static int write_midx_internal(struct repository *r, const char *object_dir, return -1; } - f = hashfd(get_tempfile_fd(incr), get_tempfile_path(incr)); + f = hashfd(r->hash_algo, get_tempfile_fd(incr), + get_tempfile_path(incr)); } else { hold_lock_file_for_update(&lk, midx_name.buf, LOCK_DIE_ON_ERROR); - f = hashfd(get_lock_file_fd(&lk), get_lock_file_path(&lk)); + f = hashfd(r->hash_algo, get_lock_file_fd(&lk), + get_lock_file_path(&lk)); } cf = init_chunkfile(f); diff --git a/midx.c b/midx.c index d91088efb87ca0..807fdf72f7b81c 100644 --- a/midx.c +++ b/midx.c @@ -747,7 +747,8 @@ int prepare_multi_pack_index_one(struct repository *r, const char *object_dir, i int midx_checksum_valid(struct multi_pack_index *m) { - return hashfile_checksum_valid(m->data, m->data_len); + return hashfile_checksum_valid(m->repo->hash_algo, + m->data, m->data_len); } struct clear_midx_data { diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index 34e86d49947d23..50e5c491ccb6af 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -1030,7 +1030,7 @@ void bitmap_writer_finish(struct bitmap_writer *writer, if (writer->pseudo_merges_nr) options |= BITMAP_OPT_PSEUDO_MERGES; - f = hashfd(fd, tmp_file.buf); + f = hashfd(the_repository->hash_algo, fd, tmp_file.buf); memcpy(header.magic, BITMAP_IDX_SIGNATURE, sizeof(BITMAP_IDX_SIGNATURE)); header.version = htons(default_version); diff --git a/pack-bitmap.c b/pack-bitmap.c index 6406953d322371..f0e2c000252fd8 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -3024,7 +3024,8 @@ int bitmap_is_preferred_refname(struct repository *r, const char *refname) return 0; } -static int verify_bitmap_file(const char *name) +static int verify_bitmap_file(const struct git_hash_algo *algop, + const char *name) { struct stat st; unsigned char *data; @@ -3040,7 +3041,7 @@ static int verify_bitmap_file(const char *name) data = xmmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); close(fd); - if (!hashfile_checksum_valid(data, st.st_size)) + if (!hashfile_checksum_valid(algop, data, st.st_size)) res = error(_("bitmap file '%s' has invalid checksum"), name); @@ -3055,14 +3056,14 @@ int verify_bitmap_files(struct repository *r) for (struct multi_pack_index *m = get_multi_pack_index(r); m; m = m->next) { char *midx_bitmap_name = midx_bitmap_filename(m); - res |= verify_bitmap_file(midx_bitmap_name); + res |= verify_bitmap_file(r->hash_algo, midx_bitmap_name); free(midx_bitmap_name); } for (struct packed_git *p = get_all_packs(r); p; p = p->next) { char *pack_bitmap_name = pack_bitmap_filename(p); - res |= verify_bitmap_file(pack_bitmap_name); + res |= verify_bitmap_file(r->hash_algo, pack_bitmap_name); free(pack_bitmap_name); } diff --git a/pack-check.c b/pack-check.c index d0aeb5ec41259b..6bcadc1e679590 100644 --- a/pack-check.c +++ b/pack-check.c @@ -180,7 +180,7 @@ int verify_pack_index(struct packed_git *p) return error("packfile %s index not opened", p->pack_name); /* Verify SHA1 sum of the index file */ - if (!hashfile_checksum_valid(p->index_data, p->index_size)) + if (!hashfile_checksum_valid(the_repository->hash_algo, p->index_data, p->index_size)) err = error("Packfile index for %s hash mismatch", p->pack_name); return err; diff --git a/pack-revindex.c b/pack-revindex.c index d3832478d99edf..78139e3d7f5c9f 100644 --- a/pack-revindex.c +++ b/pack-revindex.c @@ -322,7 +322,8 @@ int verify_pack_revindex(struct packed_git *p) if (!p->revindex_map || !p->revindex_data) return res; - if (!hashfile_checksum_valid((const unsigned char *)p->revindex_map, p->revindex_size)) { + if (!hashfile_checksum_valid(the_repository->hash_algo, + (const unsigned char *)p->revindex_map, p->revindex_size)) { error(_("invalid checksum")); res = -1; } diff --git a/pack-write.c b/pack-write.c index 823e40b42f2097..5eb89f44cf45b1 100644 --- a/pack-write.c +++ b/pack-write.c @@ -82,7 +82,7 @@ const char *write_idx_file(const struct git_hash_algo *hash_algo, if (opts->flags & WRITE_IDX_VERIFY) { assert(index_name); - f = hashfd_check(index_name); + f = hashfd_check(the_repository->hash_algo, index_name); } else { if (!index_name) { struct strbuf tmp_file = STRBUF_INIT; @@ -92,7 +92,7 @@ const char *write_idx_file(const struct git_hash_algo *hash_algo, unlink(index_name); fd = xopen(index_name, O_CREAT|O_EXCL|O_WRONLY, 0600); } - f = hashfd(fd, index_name); + f = hashfd(the_repository->hash_algo, fd, index_name); } /* if last object's offset is >= 2^31 we should use index V2 */ @@ -268,7 +268,7 @@ char *write_rev_file_order(const struct git_hash_algo *hash_algo, fd = xopen(rev_name, O_CREAT|O_EXCL|O_WRONLY, 0600); path = xstrdup(rev_name); } - f = hashfd(fd, path); + f = hashfd(the_repository->hash_algo, fd, path); } else if (flags & WRITE_REV_VERIFY) { struct stat statbuf; if (stat(rev_name, &statbuf)) { @@ -278,7 +278,7 @@ char *write_rev_file_order(const struct git_hash_algo *hash_algo, } else die_errno(_("could not stat: %s"), rev_name); } - f = hashfd_check(rev_name); + f = hashfd_check(the_repository->hash_algo, rev_name); path = xstrdup(rev_name); } else { return NULL; @@ -346,7 +346,7 @@ static char *write_mtimes_file(const struct git_hash_algo *hash_algo, fd = odb_mkstemp(&tmp_file, "pack/tmp_mtimes_XXXXXX"); mtimes_name = strbuf_detach(&tmp_file, NULL); - f = hashfd(fd, mtimes_name); + f = hashfd(the_repository->hash_algo, fd, mtimes_name); write_mtimes_header(hash_algo, f); write_mtimes_objects(f, to_pack, objects, nr_objects); @@ -534,7 +534,7 @@ struct hashfile *create_tmp_packfile(char **pack_tmp_name) fd = odb_mkstemp(&tmpname, "pack/tmp_pack_XXXXXX"); *pack_tmp_name = strbuf_detach(&tmpname, NULL); - return hashfd(fd, *pack_tmp_name); + return hashfd(the_repository->hash_algo, fd, *pack_tmp_name); } static void rename_tmp_packfile(struct strbuf *name_prefix, const char *source, diff --git a/read-cache.c b/read-cache.c index e678c13e8f15e2..a012cb22368895 100644 --- a/read-cache.c +++ b/read-cache.c @@ -2848,7 +2848,7 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile, struct strbuf sb = STRBUF_INIT; int nr, nr_threads, ret; - f = hashfd(tempfile->fd, tempfile->filename.buf); + f = hashfd(the_repository->hash_algo, tempfile->fd, tempfile->filename.buf); prepare_repo_settings(r); f->skip_hash = r->settings.index_skip_hash; From 74d414c9f14a91a3b7bd04972bf3eb9bbe6fd81b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 10 Mar 2025 08:13:21 +0100 Subject: [PATCH 070/775] object: stop depending on `the_repository` There are a couple of functions exposed by "object.c" that implicitly depend on `the_repository`. Remove this dependency by injecting the repository via a parameter. Adapt callers accordingly by simply using `the_repository`, except in cases where the subsystem is already free of the repository. In that case, we instead pass the repository provided by the caller's context. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/fsck.c | 4 ++-- builtin/grep.c | 2 +- builtin/index-pack.c | 4 ++-- builtin/log.c | 2 +- builtin/name-rev.c | 4 ++-- builtin/pack-objects.c | 2 +- builtin/prune.c | 2 +- midx-write.c | 4 ++-- object.c | 21 +++++++++++---------- object.h | 10 ++++++---- pack-bitmap.c | 6 +++--- reachable.c | 6 +++--- revision.c | 3 ++- shallow.c | 10 +++++----- upload-pack.c | 12 ++++++------ 15 files changed, 48 insertions(+), 44 deletions(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index eea1d43647f224..62c7494bbda492 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -399,12 +399,12 @@ static void check_connectivity(void) } /* Look up all the requirements, warn about missing objects.. */ - max = get_max_object_index(); + max = get_max_object_index(the_repository); if (verbose) fprintf_ln(stderr, _("Checking connectivity (%d objects)"), max); for (i = 0; i < max; i++) { - struct object *obj = get_indexed_object(i); + struct object *obj = get_indexed_object(the_repository, i); if (obj) check_object(obj); diff --git a/builtin/grep.c b/builtin/grep.c index d1427290f773b6..cbbf6f264390bf 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1144,7 +1144,7 @@ int cmd_grep(int argc, break; } - object = parse_object_or_die(&oid, arg); + object = parse_object_or_die(the_repository, &oid, arg); if (!seen_dashdash) verify_non_filename(prefix, arg); add_object_array_with_path(object, arg, &list, oc.mode, oc.path); diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 3eb5af20950ba0..1268032d7692e5 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -279,14 +279,14 @@ static unsigned check_objects(void) { unsigned i, max, foreign_nr = 0; - max = get_max_object_index(); + max = get_max_object_index(the_repository); if (verbose) progress = start_delayed_progress(the_repository, _("Checking objects"), max); for (i = 0; i < max; i++) { - foreign_nr += check_object(get_indexed_object(i)); + foreign_nr += check_object(get_indexed_object(the_repository, i)); display_progress(progress, i + 1); } diff --git a/builtin/log.c b/builtin/log.c index 04a6ef97bc1442..0d4c579dad761f 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -2468,7 +2468,7 @@ int cmd_format_patch(int argc, base = get_base_commit(&cfg, list, nr); if (base) { reset_revision_walk(); - clear_object_flags(UNINTERESTING); + clear_object_flags(the_repository, UNINTERESTING); prepare_bases(&bases, base, list, nr); } diff --git a/builtin/name-rev.c b/builtin/name-rev.c index beac166b5cb414..2fe82c839ba92c 100644 --- a/builtin/name-rev.c +++ b/builtin/name-rev.c @@ -667,9 +667,9 @@ int cmd_name_rev(int argc, } else if (all) { int i, max; - max = get_max_object_index(); + max = get_max_object_index(the_repository); for (i = 0; i < max; i++) { - struct object *obj = get_indexed_object(i); + struct object *obj = get_indexed_object(the_repository, i); if (!obj || obj->type != OBJ_COMMIT) continue; show_name(obj, NULL, diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 8e282f2a980dbf..c3adbc94504d8b 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -4161,7 +4161,7 @@ static int mark_bitmap_preferred_tip(const char *refname, if (!peel_iterated_oid(the_repository, oid, &peeled)) oid = &peeled; - object = parse_object_or_die(oid, refname); + object = parse_object_or_die(the_repository, oid, refname); if (object->type == OBJ_COMMIT) object->flags |= NEEDS_BITMAP; diff --git a/builtin/prune.c b/builtin/prune.c index 1c357fffd8cde6..8f52da8bd6608a 100644 --- a/builtin/prune.c +++ b/builtin/prune.c @@ -185,7 +185,7 @@ int cmd_prune(int argc, const char *name = *argv++; if (!repo_get_oid(the_repository, name, &oid)) { - struct object *object = parse_object_or_die(&oid, + struct object *object = parse_object_or_die(the_repository, &oid, name); add_pending_object(&revs, object, ""); } diff --git a/midx-write.c b/midx-write.c index 26d9d8bb148a4e..ac80a8298edfb6 100644 --- a/midx-write.c +++ b/midx-write.c @@ -708,7 +708,7 @@ static int add_ref_to_pending(const char *refname, const char *referent UNUSED, if (!peel_iterated_oid(revs->repo, oid, &peeled)) oid = &peeled; - object = parse_object_or_die(oid, refname); + object = parse_object_or_die(revs->repo, oid, refname); if (object->type != OBJ_COMMIT) return 0; @@ -768,7 +768,7 @@ static int read_refs_snapshot(const char *refs_snapshot, if (*end) die(_("malformed line: %s"), buf.buf); - object = parse_object_or_die(&oid, NULL); + object = parse_object_or_die(revs->repo, &oid, NULL); if (preferred) object->flags |= NEEDS_BITMAP; diff --git a/object.c b/object.c index 100bf9b8d12beb..154525a497234c 100644 --- a/object.c +++ b/object.c @@ -1,4 +1,3 @@ -#define USE_THE_REPOSITORY_VARIABLE #define DISABLE_SIGN_COMPARE_WARNINGS #include "git-compat-util.h" @@ -18,14 +17,15 @@ #include "commit-graph.h" #include "loose.h" -unsigned int get_max_object_index(void) +unsigned int get_max_object_index(const struct repository *repo) { - return the_repository->parsed_objects->obj_hash_size; + return repo->parsed_objects->obj_hash_size; } -struct object *get_indexed_object(unsigned int idx) +struct object *get_indexed_object(const struct repository *repo, + unsigned int idx) { - return the_repository->parsed_objects->obj_hash[idx]; + return repo->parsed_objects->obj_hash[idx]; } static const char *object_type_strings[] = { @@ -283,10 +283,11 @@ struct object *parse_object_buffer(struct repository *r, const struct object_id return obj; } -struct object *parse_object_or_die(const struct object_id *oid, +struct object *parse_object_or_die(struct repository *repo, + const struct object_id *oid, const char *name) { - struct object *o = parse_object(the_repository, oid); + struct object *o = parse_object(repo, oid); if (o) return o; @@ -524,12 +525,12 @@ void object_array_remove_duplicates(struct object_array *array) } } -void clear_object_flags(unsigned flags) +void clear_object_flags(struct repository *repo, unsigned flags) { int i; - for (i=0; i < the_repository->parsed_objects->obj_hash_size; i++) { - struct object *obj = the_repository->parsed_objects->obj_hash[i]; + for (i=0; i < repo->parsed_objects->obj_hash_size; i++) { + struct object *obj = repo->parsed_objects->obj_hash[i]; if (obj) obj->flags &= ~flags; } diff --git a/object.h b/object.h index 17f32f1103e58a..a30409397991b9 100644 --- a/object.h +++ b/object.h @@ -169,12 +169,13 @@ int type_from_string_gently(const char *str, ssize_t, int gentle); /* * Return the current number of buckets in the object hashmap. */ -unsigned int get_max_object_index(void); +unsigned int get_max_object_index(const struct repository *repo); /* * Return the object from the specified bucket in the object hashmap. */ -struct object *get_indexed_object(unsigned int); +struct object *get_indexed_object(const struct repository *repo, + unsigned int); /* * This can be used to see if we have heard of the object before, but @@ -231,7 +232,8 @@ struct object *parse_object_with_flags(struct repository *r, * "name" parameter is not NULL, it is included in the error message * (otherwise, the hex object ID is given). */ -struct object *parse_object_or_die(const struct object_id *oid, const char *name); +struct object *parse_object_or_die(struct repository *repo, const struct object_id *oid, + const char *name); /* Given the result of read_sha1_file(), returns the object after * parsing it. eaten_p indicates if the object has a borrowed copy @@ -336,7 +338,7 @@ void object_array_remove_duplicates(struct object_array *array); */ void object_array_clear(struct object_array *array); -void clear_object_flags(unsigned flags); +void clear_object_flags(struct repository *repo, unsigned flags); /* * Clear the specified object flags from all in-core commit objects from diff --git a/pack-bitmap.c b/pack-bitmap.c index f0e2c000252fd8..7fd78c634ef142 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -1301,7 +1301,7 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git, revs->tag_objects = tmp_tags; reset_revision_walk(); - clear_object_flags(UNINTERESTING); + clear_object_flags(repo, UNINTERESTING); /* * Then add the boundary commit(s) as fill-in traversal tips. @@ -1935,7 +1935,7 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, struct object *object = revs->pending.objects[i].item; if (object->type == OBJ_NONE) - parse_object_or_die(&object->oid, NULL); + parse_object_or_die(revs->repo, &object->oid, NULL); while (object->type == OBJ_TAG) { struct tag *tag = (struct tag *) object; @@ -1945,7 +1945,7 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, else object_list_insert(object, &wants); - object = parse_object_or_die(get_tagged_oid(tag), NULL); + object = parse_object_or_die(revs->repo, get_tagged_oid(tag), NULL); object->flags |= (tag->object.flags & UNINTERESTING); } diff --git a/reachable.c b/reachable.c index 9ee04c89ec6151..1b26b9b1d76d8c 100644 --- a/reachable.c +++ b/reachable.c @@ -45,7 +45,7 @@ static void add_one_file(const char *path, struct rev_info *revs) } strbuf_trim(&buf); if (!get_oid_hex(buf.buf, &oid)) { - object = parse_object_or_die(&oid, buf.buf); + object = parse_object_or_die(the_repository, &oid, buf.buf); add_pending_object(revs, object, ""); } strbuf_release(&buf); @@ -94,7 +94,7 @@ static int add_one_ref(const char *path, const char *referent UNUSED, const stru return 0; } - object = parse_object_or_die(oid, path); + object = parse_object_or_die(the_repository, oid, path); add_pending_object(revs, object, ""); return 0; @@ -218,7 +218,7 @@ static void add_recent_object(const struct object_id *oid, switch (type) { case OBJ_TAG: case OBJ_COMMIT: - obj = parse_object_or_die(oid, NULL); + obj = parse_object_or_die(the_repository, oid, NULL); break; case OBJ_TREE: obj = (struct object *)lookup_tree(the_repository, oid); diff --git a/revision.c b/revision.c index c4390f0938cbde..b536c4a29ad1d7 100644 --- a/revision.c +++ b/revision.c @@ -3612,7 +3612,8 @@ static void set_children(struct rev_info *revs) void reset_revision_walk(void) { - clear_object_flags(SEEN | ADDED | SHOWN | TOPO_WALK_EXPLORED | TOPO_WALK_INDEGREE); + clear_object_flags(the_repository, + SEEN | ADDED | SHOWN | TOPO_WALK_EXPLORED | TOPO_WALK_INDEGREE); } static int mark_uninteresting(const struct object_id *oid, diff --git a/shallow.c b/shallow.c index 4bd9342c9a745a..06c3266a3e0b14 100644 --- a/shallow.c +++ b/shallow.c @@ -226,7 +226,7 @@ struct commit_list *get_shallow_commits_by_rev_list(int ac, const char **av, * SHALLOW (excluded) and NOT_SHALLOW (included) should not be * set at this point. But better be safe than sorry. */ - clear_object_flags(both_flags); + clear_object_flags(the_repository, both_flags); is_repository_shallow(the_repository); /* make sure shallows are read */ @@ -613,9 +613,9 @@ static void paint_down(struct paint_info *info, const struct object_id *oid, } } - nr = get_max_object_index(); + nr = get_max_object_index(the_repository); for (i = 0; i < nr; i++) { - struct object *o = get_indexed_object(i); + struct object *o = get_indexed_object(the_repository, i); if (o && o->type == OBJ_COMMIT) o->flags &= ~SEEN; } @@ -675,9 +675,9 @@ void assign_shallow_commits_to_refs(struct shallow_info *info, * Prepare the commit graph to track what refs can reach what * (new) shallow commits. */ - nr = get_max_object_index(); + nr = get_max_object_index(the_repository); for (i = 0; i < nr; i++) { - struct object *o = get_indexed_object(i); + struct object *o = get_indexed_object(the_repository, i); if (!o || o->type != OBJ_COMMIT) continue; diff --git a/upload-pack.c b/upload-pack.c index 7498b45e2e1e21..67063708b825be 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -665,8 +665,8 @@ static int do_reachable_revlist(struct child_process *cmd, cmd_in = xfdopen(cmd->in, "w"); - for (i = get_max_object_index(); 0 < i; ) { - o = get_indexed_object(--i); + for (i = get_max_object_index(the_repository); 0 < i; ) { + o = get_indexed_object(the_repository, --i); if (!o) continue; if (reachable && o->type == OBJ_COMMIT) @@ -734,8 +734,8 @@ static int get_reachable_list(struct upload_pack_data *data, o->flags &= ~TMP_MARK; } } - for (i = get_max_object_index(); 0 < i; i--) { - o = get_indexed_object(i - 1); + for (i = get_max_object_index(the_repository); 0 < i; i--) { + o = get_indexed_object(the_repository, i - 1); if (o && o->type == OBJ_COMMIT && (o->flags & TMP_MARK)) { add_object_array(o, NULL, reachable); @@ -1557,7 +1557,7 @@ static int parse_want_ref(struct packet_writer *writer, const char *line, } if (!o) - o = parse_object_or_die(&oid, refname_nons); + o = parse_object_or_die(the_repository, &oid, refname_nons); if (!(o->flags & WANTED)) { o->flags |= WANTED; @@ -1793,7 +1793,7 @@ int upload_pack_v2(struct repository *r, struct packet_reader *request) enum fetch_state state = FETCH_PROCESS_ARGS; struct upload_pack_data data; - clear_object_flags(ALL_FLAGS); + clear_object_flags(the_repository, ALL_FLAGS); upload_pack_data_init(&data); data.use_sideband = LARGE_PACKET_MAX; From 2582846f2fe21b23fe7c567e030510960f135160 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 10 Mar 2025 08:13:22 +0100 Subject: [PATCH 071/775] pack-write: stop depending on `the_repository` and `the_hash_algo` There are a couple of functions in "pack-write.c" that implicitly depend on `the_repository` or `the_hash_algo`. Remove this dependency by injecting the repository via a parameter and adapt callers accordingly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/fast-import.c | 2 +- builtin/index-pack.c | 4 +-- builtin/pack-objects.c | 4 +-- bulk-checkin.c | 4 +-- midx-write.c | 2 +- pack-write.c | 55 +++++++++++++++++++++--------------------- pack.h | 11 +++++---- 7 files changed, 41 insertions(+), 41 deletions(-) diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 86e6e754816b9a..e1758aa95140ed 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -798,7 +798,7 @@ static const char *create_index(void) if (c != last) die("internal consistency error creating the index"); - tmpfile = write_idx_file(the_hash_algo, NULL, idx, object_count, + tmpfile = write_idx_file(the_repository, NULL, idx, object_count, &pack_idx_opts, pack_data->hash); free(idx); return tmpfile; diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 1268032d7692e5..174e03afa61272 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -2088,10 +2088,10 @@ int cmd_index_pack(int argc, ALLOC_ARRAY(idx_objects, nr_objects); for (i = 0; i < nr_objects; i++) idx_objects[i] = &objects[i].idx; - curr_index = write_idx_file(the_hash_algo, index_name, idx_objects, + curr_index = write_idx_file(the_repository, index_name, idx_objects, nr_objects, &opts, pack_hash); if (rev_index) - curr_rev_index = write_rev_file(the_hash_algo, rev_index_name, + curr_rev_index = write_rev_file(the_repository, rev_index_name, idx_objects, nr_objects, pack_hash, opts.flags); free(idx_objects); diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index c3adbc94504d8b..e05dfc4021e212 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1314,7 +1314,7 @@ static void write_pack_file(void) f = hashfd_throughput(the_repository->hash_algo, 1, "", progress_state); else - f = create_tmp_packfile(&pack_tmp_name); + f = create_tmp_packfile(the_repository, &pack_tmp_name); offset = write_pack_header(f, nr_remaining); @@ -1407,7 +1407,7 @@ static void write_pack_file(void) if (cruft) pack_idx_opts.flags |= WRITE_MTIMES; - stage_tmp_packfiles(the_hash_algo, &tmpname, + stage_tmp_packfiles(the_repository, &tmpname, pack_tmp_name, written_list, nr_written, &to_pack, &pack_idx_opts, hash, diff --git a/bulk-checkin.c b/bulk-checkin.c index 20f2da67b93faa..23ac00ea0a6b41 100644 --- a/bulk-checkin.c +++ b/bulk-checkin.c @@ -44,7 +44,7 @@ static void finish_tmp_packfile(struct strbuf *basename, { char *idx_tmp_name = NULL; - stage_tmp_packfiles(the_hash_algo, basename, pack_tmp_name, + stage_tmp_packfiles(the_repository, basename, pack_tmp_name, written_list, nr_written, NULL, pack_idx_opts, hash, &idx_tmp_name); rename_tmp_packfile_idx(basename, &idx_tmp_name); @@ -244,7 +244,7 @@ static void prepare_to_stream(struct bulk_checkin_packfile *state, if (!(flags & HASH_WRITE_OBJECT) || state->f) return; - state->f = create_tmp_packfile(&state->pack_tmp_name); + state->f = create_tmp_packfile(the_repository, &state->pack_tmp_name); reset_pack_idx_option(&state->pack_idx_opts); /* Pretend we are going to write only one object */ diff --git a/midx-write.c b/midx-write.c index ac80a8298edfb6..a628ac24dcb428 100644 --- a/midx-write.c +++ b/midx-write.c @@ -658,7 +658,7 @@ static void write_midx_reverse_index(char *midx_name, unsigned char *midx_hash, strbuf_addf(&buf, "%s-%s.rev", midx_name, hash_to_hex_algop(midx_hash, ctx->repo->hash_algo)); - tmp_file = write_rev_file_order(ctx->repo->hash_algo, NULL, ctx->pack_order, + tmp_file = write_rev_file_order(ctx->repo, NULL, ctx->pack_order, ctx->entries_nr, midx_hash, WRITE_REV); if (finalize_object_file(tmp_file, buf.buf)) diff --git a/pack-write.c b/pack-write.c index 5eb89f44cf45b1..6b06315f80ab48 100644 --- a/pack-write.c +++ b/pack-write.c @@ -1,5 +1,3 @@ -#define USE_THE_REPOSITORY_VARIABLE - #include "git-compat-util.h" #include "environment.h" #include "gettext.h" @@ -56,7 +54,7 @@ static int need_large_offset(off_t offset, const struct pack_idx_option *opts) * The *sha1 contains the pack content SHA1 hash. * The objects array passed in will be sorted by SHA1 on exit. */ -const char *write_idx_file(const struct git_hash_algo *hash_algo, +const char *write_idx_file(struct repository *repo, const char *index_name, struct pack_idx_entry **objects, int nr_objects, const struct pack_idx_option *opts, const unsigned char *sha1) @@ -82,7 +80,7 @@ const char *write_idx_file(const struct git_hash_algo *hash_algo, if (opts->flags & WRITE_IDX_VERIFY) { assert(index_name); - f = hashfd_check(the_repository->hash_algo, index_name); + f = hashfd_check(repo->hash_algo, index_name); } else { if (!index_name) { struct strbuf tmp_file = STRBUF_INIT; @@ -92,7 +90,7 @@ const char *write_idx_file(const struct git_hash_algo *hash_algo, unlink(index_name); fd = xopen(index_name, O_CREAT|O_EXCL|O_WRONLY, 0600); } - f = hashfd(the_repository->hash_algo, fd, index_name); + f = hashfd(repo->hash_algo, fd, index_name); } /* if last object's offset is >= 2^31 we should use index V2 */ @@ -131,7 +129,7 @@ const char *write_idx_file(const struct git_hash_algo *hash_algo, struct pack_idx_entry *obj = *list++; if (index_version < 2) hashwrite_be32(f, obj->offset); - hashwrite(f, obj->oid.hash, hash_algo->rawsz); + hashwrite(f, obj->oid.hash, repo->hash_algo->rawsz); if ((opts->flags & WRITE_IDX_STRICT) && (i && oideq(&list[-2]->oid, &obj->oid))) die("The same object %s appears twice in the pack", @@ -173,7 +171,7 @@ const char *write_idx_file(const struct git_hash_algo *hash_algo, } } - hashwrite(f, sha1, hash_algo->rawsz); + hashwrite(f, sha1, repo->hash_algo->rawsz); finalize_hashfile(f, NULL, FSYNC_COMPONENT_PACK_METADATA, CSUM_HASH_IN_STREAM | CSUM_CLOSE | ((opts->flags & WRITE_IDX_VERIFY) ? 0 : CSUM_FSYNC)); @@ -217,7 +215,7 @@ static void write_rev_trailer(const struct git_hash_algo *hash_algo, hashwrite(f, hash, hash_algo->rawsz); } -char *write_rev_file(const struct git_hash_algo *hash_algo, +char *write_rev_file(struct repository *repo, const char *rev_name, struct pack_idx_entry **objects, uint32_t nr_objects, @@ -236,7 +234,7 @@ char *write_rev_file(const struct git_hash_algo *hash_algo, pack_order[i] = i; QSORT_S(pack_order, nr_objects, pack_order_cmp, objects); - ret = write_rev_file_order(hash_algo, rev_name, pack_order, nr_objects, + ret = write_rev_file_order(repo, rev_name, pack_order, nr_objects, hash, flags); free(pack_order); @@ -244,7 +242,7 @@ char *write_rev_file(const struct git_hash_algo *hash_algo, return ret; } -char *write_rev_file_order(const struct git_hash_algo *hash_algo, +char *write_rev_file_order(struct repository *repo, const char *rev_name, uint32_t *pack_order, uint32_t nr_objects, @@ -268,7 +266,7 @@ char *write_rev_file_order(const struct git_hash_algo *hash_algo, fd = xopen(rev_name, O_CREAT|O_EXCL|O_WRONLY, 0600); path = xstrdup(rev_name); } - f = hashfd(the_repository->hash_algo, fd, path); + f = hashfd(repo->hash_algo, fd, path); } else if (flags & WRITE_REV_VERIFY) { struct stat statbuf; if (stat(rev_name, &statbuf)) { @@ -278,18 +276,18 @@ char *write_rev_file_order(const struct git_hash_algo *hash_algo, } else die_errno(_("could not stat: %s"), rev_name); } - f = hashfd_check(the_repository->hash_algo, rev_name); + f = hashfd_check(repo->hash_algo, rev_name); path = xstrdup(rev_name); } else { return NULL; } - write_rev_header(hash_algo, f); + write_rev_header(repo->hash_algo, f); write_rev_index_positions(f, pack_order, nr_objects); - write_rev_trailer(hash_algo, f, hash); + write_rev_trailer(repo->hash_algo, f, hash); - if (adjust_shared_perm(the_repository, path) < 0) + if (adjust_shared_perm(repo, path) < 0) die(_("failed to make %s readable"), path); finalize_hashfile(f, NULL, FSYNC_COMPONENT_PACK_METADATA, @@ -330,7 +328,7 @@ static void write_mtimes_trailer(const struct git_hash_algo *hash_algo, hashwrite(f, hash, hash_algo->rawsz); } -static char *write_mtimes_file(const struct git_hash_algo *hash_algo, +static char *write_mtimes_file(struct repository *repo, struct packing_data *to_pack, struct pack_idx_entry **objects, uint32_t nr_objects, @@ -346,13 +344,13 @@ static char *write_mtimes_file(const struct git_hash_algo *hash_algo, fd = odb_mkstemp(&tmp_file, "pack/tmp_mtimes_XXXXXX"); mtimes_name = strbuf_detach(&tmp_file, NULL); - f = hashfd(the_repository->hash_algo, fd, mtimes_name); + f = hashfd(repo->hash_algo, fd, mtimes_name); - write_mtimes_header(hash_algo, f); + write_mtimes_header(repo->hash_algo, f); write_mtimes_objects(f, to_pack, objects, nr_objects); - write_mtimes_trailer(hash_algo, f, hash); + write_mtimes_trailer(repo->hash_algo, f, hash); - if (adjust_shared_perm(the_repository, mtimes_name) < 0) + if (adjust_shared_perm(repo, mtimes_name) < 0) die(_("failed to make %s readable"), mtimes_name); finalize_hashfile(f, NULL, FSYNC_COMPONENT_PACK_METADATA, @@ -527,14 +525,15 @@ int encode_in_pack_object_header(unsigned char *hdr, int hdr_len, return n; } -struct hashfile *create_tmp_packfile(char **pack_tmp_name) +struct hashfile *create_tmp_packfile(struct repository *repo, + char **pack_tmp_name) { struct strbuf tmpname = STRBUF_INIT; int fd; fd = odb_mkstemp(&tmpname, "pack/tmp_pack_XXXXXX"); *pack_tmp_name = strbuf_detach(&tmpname, NULL); - return hashfd(the_repository->hash_algo, fd, *pack_tmp_name); + return hashfd(repo->hash_algo, fd, *pack_tmp_name); } static void rename_tmp_packfile(struct strbuf *name_prefix, const char *source, @@ -555,7 +554,7 @@ void rename_tmp_packfile_idx(struct strbuf *name_buffer, rename_tmp_packfile(name_buffer, *idx_tmp_name, "idx"); } -void stage_tmp_packfiles(const struct git_hash_algo *hash_algo, +void stage_tmp_packfiles(struct repository *repo, struct strbuf *name_buffer, const char *pack_tmp_name, struct pack_idx_entry **written_list, @@ -568,19 +567,19 @@ void stage_tmp_packfiles(const struct git_hash_algo *hash_algo, char *rev_tmp_name = NULL; char *mtimes_tmp_name = NULL; - if (adjust_shared_perm(the_repository, pack_tmp_name)) + if (adjust_shared_perm(repo, pack_tmp_name)) die_errno("unable to make temporary pack file readable"); - *idx_tmp_name = (char *)write_idx_file(hash_algo, NULL, written_list, + *idx_tmp_name = (char *)write_idx_file(repo, NULL, written_list, nr_written, pack_idx_opts, hash); - if (adjust_shared_perm(the_repository, *idx_tmp_name)) + if (adjust_shared_perm(repo, *idx_tmp_name)) die_errno("unable to make temporary index file readable"); - rev_tmp_name = write_rev_file(hash_algo, NULL, written_list, nr_written, + rev_tmp_name = write_rev_file(repo, NULL, written_list, nr_written, hash, pack_idx_opts->flags); if (pack_idx_opts->flags & WRITE_MTIMES) { - mtimes_tmp_name = write_mtimes_file(hash_algo, to_pack, + mtimes_tmp_name = write_mtimes_file(repo, to_pack, written_list, nr_written, hash); } diff --git a/pack.h b/pack.h index 9f1194ac13de17..5d4393eaffef04 100644 --- a/pack.h +++ b/pack.h @@ -87,7 +87,7 @@ struct progress; /* Note, the data argument could be NULL if object type is blob */ typedef int (*verify_fn)(const struct object_id *, enum object_type, unsigned long, void*, int*); -const char *write_idx_file(const struct git_hash_algo *hash_algo, +const char *write_idx_file(struct repository *repo, const char *index_name, struct pack_idx_entry **objects, int nr_objects, @@ -106,13 +106,13 @@ struct ref; void write_promisor_file(const char *promisor_name, struct ref **sought, int nr_sought); -char *write_rev_file(const struct git_hash_algo *hash_algo, +char *write_rev_file(struct repository *repo, const char *rev_name, struct pack_idx_entry **objects, uint32_t nr_objects, const unsigned char *hash, unsigned flags); -char *write_rev_file_order(const struct git_hash_algo *hash_algo, +char *write_rev_file_order(struct repository *repo, const char *rev_name, uint32_t *pack_order, uint32_t nr_objects, @@ -134,8 +134,9 @@ int read_pack_header(int fd, struct pack_header *); struct packing_data; -struct hashfile *create_tmp_packfile(char **pack_tmp_name); -void stage_tmp_packfiles(const struct git_hash_algo *hash_algo, +struct hashfile *create_tmp_packfile(struct repository *repo, + char **pack_tmp_name); +void stage_tmp_packfiles(struct repository *repo, struct strbuf *name_buffer, const char *pack_tmp_name, struct pack_idx_entry **written_list, From 7835ee75cdffbce925246cbacc83e8b4a932a681 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 10 Mar 2025 08:13:23 +0100 Subject: [PATCH 072/775] environment: move access to "core.bigFileThreshold" into repo settings The "core.bigFileThreshold" setting is stored in a global variable and populated via `git_default_core_config()`. This may cause issues in the case where one is handling multiple different repositories in a single process with different values for that config key, as we may or may not see the correct value in that case. Furthermore, global state blocks our path towards libification. Refactor the code so that we instead store the value in `struct repo_settings`, where the value is computed as-needed and cached. Note that this change requires us to adapt one test in t1050 that verifies that we die when parsing an invalid "core.bigFileThreshold" value. The exercised Git command doesn't use the value at all, and thus it won't hit the new code path that parses the value. This is addressed by using git-hash-object(1) instead, which does read the value. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- archive.c | 2 +- builtin/fast-import.c | 4 ++-- builtin/index-pack.c | 6 ++++-- builtin/pack-objects.c | 6 ++++-- builtin/unpack-objects.c | 3 ++- config.c | 5 ----- diff.c | 6 ++++-- environment.c | 1 - environment.h | 1 - object-file.c | 6 ++++-- pack-check.c | 3 ++- repo-settings.c | 20 ++++++++++++++++++++ repo-settings.h | 5 +++++ streaming.c | 3 ++- t/t1050-large.sh | 3 ++- 15 files changed, 52 insertions(+), 22 deletions(-) diff --git a/archive.c b/archive.c index 8be4e7ac8db50c..f4331aeb49e7b3 100644 --- a/archive.c +++ b/archive.c @@ -216,7 +216,7 @@ static int write_archive_entry(const struct object_id *oid, const char *base, /* Stream it? */ if (S_ISREG(mode) && !args->convert && oid_object_info(args->repo, oid, &size) == OBJ_BLOB && - size > big_file_threshold) + size > repo_settings_get_big_file_threshold(the_repository)) return write_entry(args, oid, path.buf, path.len, mode, NULL, size); buffer = object_file_to_archive(args, path.buf, oid, mode, &type, &size); diff --git a/builtin/fast-import.c b/builtin/fast-import.c index e1758aa95140ed..ce52f78579d818 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -2021,7 +2021,7 @@ static void parse_and_store_blob( static struct strbuf buf = STRBUF_INIT; uintmax_t len; - if (parse_data(&buf, big_file_threshold, &len)) + if (parse_data(&buf, repo_settings_get_big_file_threshold(the_repository), &len)) store_object(OBJ_BLOB, &buf, last, oidout, mark); else { if (last) { @@ -3402,7 +3402,7 @@ static int parse_one_option(const char *option) unsigned long v; if (!git_parse_ulong(option, &v)) return 0; - big_file_threshold = v; + repo_settings_set_big_file_threshold(the_repository, v); } else if (skip_prefix(option, "depth=", &option)) { option_depth(option); } else if (skip_prefix(option, "active-branches=", &option)) { diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 174e03afa61272..09c0a8adf63791 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -485,7 +485,8 @@ static void *unpack_entry_data(off_t offset, unsigned long size, git_hash_update(&c, hdr, hdrlen); } else oid = NULL; - if (type == OBJ_BLOB && size > big_file_threshold) + if (type == OBJ_BLOB && + size > repo_settings_get_big_file_threshold(the_repository)) buf = fixed_buf; else buf = xmallocz(size); @@ -799,7 +800,8 @@ static int check_collison(struct object_entry *entry) enum object_type type; unsigned long size; - if (entry->size <= big_file_threshold || entry->type != OBJ_BLOB) + if (entry->size <= repo_settings_get_big_file_threshold(the_repository) || + entry->type != OBJ_BLOB) return -1; memset(&data, 0, sizeof(data)); diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index e05dfc4021e212..eefc6c7809607f 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -499,7 +499,8 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent if (!usable_delta) { if (oe_type(entry) == OBJ_BLOB && - oe_size_greater_than(&to_pack, entry, big_file_threshold) && + oe_size_greater_than(&to_pack, entry, + repo_settings_get_big_file_threshold(the_repository)) && (st = open_istream(the_repository, &entry->idx.oid, &type, &size, NULL)) != NULL) buf = NULL; @@ -2454,7 +2455,8 @@ static void get_object_details(void) struct object_entry *entry = sorted_by_offset[i]; check_object(entry, i); if (entry->type_valid && - oe_size_greater_than(&to_pack, entry, big_file_threshold)) + oe_size_greater_than(&to_pack, entry, + repo_settings_get_big_file_threshold(the_repository))) entry->no_try_delta = 1; display_progress(progress_state, i + 1); } diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index 8383bcf404957d..e20c120913e5e6 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -553,7 +553,8 @@ static void unpack_one(unsigned nr) switch (type) { case OBJ_BLOB: - if (!dry_run && size > big_file_threshold) { + if (!dry_run && + size > repo_settings_get_big_file_threshold(the_repository)) { stream_blob(size, nr); return; } diff --git a/config.c b/config.c index dfd03b9421cdf9..dc95608f749fb1 100644 --- a/config.c +++ b/config.c @@ -1490,11 +1490,6 @@ static int git_default_core_config(const char *var, const char *value, return 0; } - if (!strcmp(var, "core.bigfilethreshold")) { - big_file_threshold = git_config_ulong(var, value, ctx->kvi); - return 0; - } - if (!strcmp(var, "core.autocrlf")) { if (value && !strcasecmp(value, "input")) { auto_crlf = AUTO_CRLF_INPUT; diff --git a/diff.c b/diff.c index c89c15d98e0e29..4f802b4251c712 100644 --- a/diff.c +++ b/diff.c @@ -4193,7 +4193,8 @@ int diff_populate_filespec(struct repository *r, * is probably fine. */ if (check_binary && - s->size > big_file_threshold && s->is_binary == -1) { + s->size > repo_settings_get_big_file_threshold(the_repository) && + s->is_binary == -1) { s->is_binary = 1; return 0; } @@ -4243,7 +4244,8 @@ int diff_populate_filespec(struct repository *r, if (size_only || check_binary) { if (size_only) return 0; - if (s->size > big_file_threshold && s->is_binary == -1) { + if (s->size > repo_settings_get_big_file_threshold(the_repository) && + s->is_binary == -1) { s->is_binary = 1; return 0; } diff --git a/environment.c b/environment.c index 9e4c7781be049a..3c32367c28c30b 100644 --- a/environment.c +++ b/environment.c @@ -49,7 +49,6 @@ int fsync_object_files = -1; int use_fsync = -1; enum fsync_method fsync_method = FSYNC_METHOD_DEFAULT; enum fsync_component fsync_components = FSYNC_COMPONENTS_DEFAULT; -unsigned long big_file_threshold = 512 * 1024 * 1024; char *editor_program; char *askpass_program; char *excludes_file; diff --git a/environment.h b/environment.h index 45e690f203fd1d..91d854dcb087a8 100644 --- a/environment.h +++ b/environment.h @@ -154,7 +154,6 @@ extern int zlib_compression_level; extern int pack_compression_level; extern size_t packed_git_window_size; extern size_t packed_git_limit; -extern unsigned long big_file_threshold; extern unsigned long pack_size_limit_cfg; extern int max_allowed_tree_depth; diff --git a/object-file.c b/object-file.c index 726e41a0475b43..b3e0276b2a45e1 100644 --- a/object-file.c +++ b/object-file.c @@ -2803,7 +2803,8 @@ int index_fd(struct index_state *istate, struct object_id *oid, ret = index_stream_convert_blob(istate, oid, fd, path, flags); else if (!S_ISREG(st->st_mode)) ret = index_pipe(istate, oid, fd, type, path, flags); - else if (st->st_size <= big_file_threshold || type != OBJ_BLOB || + else if (st->st_size <= repo_settings_get_big_file_threshold(the_repository) || + type != OBJ_BLOB || (path && would_convert_to_git(istate, path))) ret = index_core(istate, oid, fd, xsize_t(st->st_size), type, path, flags); @@ -3137,7 +3138,8 @@ int read_loose_object(const char *path, goto out; } - if (*oi->typep == OBJ_BLOB && *size > big_file_threshold) { + if (*oi->typep == OBJ_BLOB && + *size > repo_settings_get_big_file_threshold(the_repository)) { if (check_stream_oid(&stream, hdr, *size, path, expected_oid) < 0) goto out; } else { diff --git a/pack-check.c b/pack-check.c index 6bcadc1e679590..017dc740f7b8cb 100644 --- a/pack-check.c +++ b/pack-check.c @@ -131,7 +131,8 @@ static int verify_packfile(struct repository *r, type = unpack_object_header(p, w_curs, &curpos, &size); unuse_pack(w_curs); - if (type == OBJ_BLOB && big_file_threshold <= size) { + if (type == OBJ_BLOB && + repo_settings_get_big_file_threshold(the_repository) <= size) { /* * Let stream_object_signature() check it with * the streaming interface; no point slurping diff --git a/repo-settings.c b/repo-settings.c index 67e9cfd2e63d9c..4129f8fb2b43a3 100644 --- a/repo-settings.c +++ b/repo-settings.c @@ -20,6 +20,13 @@ static void repo_cfg_int(struct repository *r, const char *key, int *dest, *dest = def; } +static void repo_cfg_ulong(struct repository *r, const char *key, unsigned long *dest, + unsigned long def) +{ + if (repo_config_get_ulong(r, key, dest)) + *dest = def; +} + void prepare_repo_settings(struct repository *r) { int experimental; @@ -151,6 +158,19 @@ void repo_settings_clear(struct repository *r) r->settings = empty; } +unsigned long repo_settings_get_big_file_threshold(struct repository *repo) +{ + if (!repo->settings.big_file_threshold) + repo_cfg_ulong(repo, "core.bigfilethreshold", + &repo->settings.big_file_threshold, 512 * 1024 * 1024); + return repo->settings.big_file_threshold; +} + +void repo_settings_set_big_file_threshold(struct repository *repo, unsigned long value) +{ + repo->settings.big_file_threshold = value; +} + enum log_refs_config repo_settings_get_log_all_ref_updates(struct repository *repo) { const char *value; diff --git a/repo-settings.h b/repo-settings.h index ddc11967e015df..2bf24b259735c2 100644 --- a/repo-settings.h +++ b/repo-settings.h @@ -64,6 +64,7 @@ struct repo_settings { size_t delta_base_cache_limit; size_t packed_git_window_size; size_t packed_git_limit; + unsigned long big_file_threshold; char *hooks_path; }; @@ -88,6 +89,10 @@ int repo_settings_get_warn_ambiguous_refs(struct repository *repo); /* Read the value for "core.hooksPath". */ const char *repo_settings_get_hooks_path(struct repository *repo); +/* Read and set the value for "core.bigFileThreshold". */ +unsigned long repo_settings_get_big_file_threshold(struct repository *repo); +void repo_settings_set_big_file_threshold(struct repository *repo, unsigned long value); + /* Read, set or reset the value for "core.sharedRepository". */ int repo_settings_get_shared_repository(struct repository *repo); void repo_settings_set_shared_repository(struct repository *repo, int value); diff --git a/streaming.c b/streaming.c index 38839511afc192..018b794d252819 100644 --- a/streaming.c +++ b/streaming.c @@ -431,7 +431,8 @@ static int istream_source(struct git_istream *st, st->open = open_istream_loose; return 0; case OI_PACKED: - if (!oi.u.packed.is_delta && big_file_threshold < size) { + if (!oi.u.packed.is_delta && + repo_settings_get_big_file_threshold(the_repository) < size) { st->u.in_pack.pack = oi.u.packed.pack; st->u.in_pack.pos = oi.u.packed.offset; st->open = open_istream_pack_non_delta; diff --git a/t/t1050-large.sh b/t/t1050-large.sh index c71932b0242373..5be273611ad850 100755 --- a/t/t1050-large.sh +++ b/t/t1050-large.sh @@ -6,7 +6,8 @@ test_description='adding and checking out large blobs' . ./test-lib.sh test_expect_success 'core.bigFileThreshold must be non-negative' ' - test_must_fail git -c core.bigFileThreshold=-1 rev-parse >out 2>err && + : >input && + test_must_fail git -c core.bigFileThreshold=-1 hash-object input >out 2>err && grep "bad numeric config value" err && test_must_be_empty out ' From 7ebf19ce55ebfddd152aab6ddcc6559bba378aec Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 10 Mar 2025 08:13:24 +0100 Subject: [PATCH 073/775] pack-check: stop depending on `the_repository` There are multiple sites in "pack-check.c" where we use the global `the_repository` variable, either explicitly or implicitly by using `the_hash_algo`. In all of those cases we already have a repository available in the calling context though. Refactor the code to instead use the caller-provided repository and remove the `USE_THE_REPOSITORY_VARIABLE` define. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- pack-check.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pack-check.c b/pack-check.c index 017dc740f7b8cb..95dcbbe985265a 100644 --- a/pack-check.c +++ b/pack-check.c @@ -1,4 +1,3 @@ -#define USE_THE_REPOSITORY_VARIABLE #define DISABLE_SIGN_COMPARE_WARNINGS #include "git-compat-util.h" @@ -44,7 +43,7 @@ int check_pack_crc(struct packed_git *p, struct pack_window **w_curs, } while (len); index_crc = p->index_data; - index_crc += 2 + 256 + (size_t)p->num_objects * (the_hash_algo->rawsz/4) + nr; + index_crc += 2 + 256 + (size_t)p->num_objects * (p->repo->hash_algo->rawsz/4) + nr; return data_crc != ntohl(*index_crc); } @@ -81,11 +80,11 @@ static int verify_packfile(struct repository *r, } while (offset < pack_sig_ofs); git_hash_final(hash, &ctx); pack_sig = use_pack(p, w_curs, pack_sig_ofs, NULL); - if (!hasheq(hash, pack_sig, the_repository->hash_algo)) + if (!hasheq(hash, pack_sig, r->hash_algo)) err = error("%s pack checksum mismatch", p->pack_name); if (!hasheq(index_base + index_size - r->hash_algo->hexsz, pack_sig, - the_repository->hash_algo)) + r->hash_algo)) err = error("%s pack checksum does not match its index", p->pack_name); unuse_pack(w_curs); @@ -132,7 +131,7 @@ static int verify_packfile(struct repository *r, unuse_pack(w_curs); if (type == OBJ_BLOB && - repo_settings_get_big_file_threshold(the_repository) <= size) { + repo_settings_get_big_file_threshold(r) <= size) { /* * Let stream_object_signature() check it with * the streaming interface; no point slurping @@ -181,7 +180,7 @@ int verify_pack_index(struct packed_git *p) return error("packfile %s index not opened", p->pack_name); /* Verify SHA1 sum of the index file */ - if (!hashfile_checksum_valid(the_repository->hash_algo, p->index_data, p->index_size)) + if (!hashfile_checksum_valid(p->repo->hash_algo, p->index_data, p->index_size)) err = error("Packfile index for %s hash mismatch", p->pack_name); return err; From e5957ca321802339c9710acadae2138262701e5d Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 10 Mar 2025 08:13:25 +0100 Subject: [PATCH 074/775] pack-revindex: stop depending on `the_repository` There are multiple sites in "pack-revindex.c" where we use the global `the_repository` variable, either explicitly or implicitly by using `the_hash_algo`. In all of those cases we already have a repository available in the calling context though. Refactor the code to instead use the caller-provided repository and remove the `USE_THE_REPOSITORY_VARIABLE` define. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- pack-revindex.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/pack-revindex.c b/pack-revindex.c index 78139e3d7f5c9f..038e0c96b1ca5e 100644 --- a/pack-revindex.c +++ b/pack-revindex.c @@ -1,5 +1,3 @@ -#define USE_THE_REPOSITORY_VARIABLE - #include "git-compat-util.h" #include "gettext.h" #include "pack-revindex.h" @@ -9,6 +7,7 @@ #include "strbuf.h" #include "trace2.h" #include "parse.h" +#include "repository.h" #include "midx.h" #include "csum-file.h" @@ -137,7 +136,7 @@ static void create_pack_revindex(struct packed_git *p) const unsigned num_ent = p->num_objects; unsigned i; const char *index = p->index_data; - const unsigned hashsz = the_hash_algo->rawsz; + const unsigned hashsz = p->repo->hash_algo->rawsz; ALLOC_ARRAY(p->revindex, num_ent + 1); index += 4 * 256; @@ -193,7 +192,11 @@ static char *pack_revindex_filename(struct packed_git *p) } #define RIDX_HEADER_SIZE (12) -#define RIDX_MIN_SIZE (RIDX_HEADER_SIZE + (2 * the_hash_algo->rawsz)) + +static size_t ridx_min_size(const struct git_hash_algo *algo) +{ + return RIDX_HEADER_SIZE + (2 * algo->rawsz); +} struct revindex_header { uint32_t signature; @@ -201,7 +204,8 @@ struct revindex_header { uint32_t hash_id; }; -static int load_revindex_from_disk(char *revindex_name, +static int load_revindex_from_disk(const struct git_hash_algo *algo, + char *revindex_name, uint32_t num_objects, const uint32_t **data_p, size_t *len_p) { @@ -228,12 +232,12 @@ static int load_revindex_from_disk(char *revindex_name, revindex_size = xsize_t(st.st_size); - if (revindex_size < RIDX_MIN_SIZE) { + if (revindex_size < ridx_min_size(algo)) { ret = error(_("reverse-index file %s is too small"), revindex_name); goto cleanup; } - if (revindex_size - RIDX_MIN_SIZE != st_mult(sizeof(uint32_t), num_objects)) { + if (revindex_size - ridx_min_size(algo) != st_mult(sizeof(uint32_t), num_objects)) { ret = error(_("reverse-index file %s is corrupt"), revindex_name); goto cleanup; } @@ -279,7 +283,8 @@ int load_pack_revindex_from_disk(struct packed_git *p) revindex_name = pack_revindex_filename(p); - ret = load_revindex_from_disk(revindex_name, + ret = load_revindex_from_disk(p->repo->hash_algo, + revindex_name, p->num_objects, &p->revindex_map, &p->revindex_size); @@ -322,7 +327,7 @@ int verify_pack_revindex(struct packed_git *p) if (!p->revindex_map || !p->revindex_data) return res; - if (!hashfile_checksum_valid(the_repository->hash_algo, + if (!hashfile_checksum_valid(p->repo->hash_algo, (const unsigned char *)p->revindex_map, p->revindex_size)) { error(_("invalid checksum")); res = -1; @@ -375,19 +380,20 @@ int load_midx_revindex(struct multi_pack_index *m) * not want to accidentally call munmap() in the middle of the * MIDX. */ - trace2_data_string("load_midx_revindex", the_repository, + trace2_data_string("load_midx_revindex", m->repo, "source", "midx"); m->revindex_data = (const uint32_t *)m->chunk_revindex; return 0; } - trace2_data_string("load_midx_revindex", the_repository, + trace2_data_string("load_midx_revindex", m->repo, "source", "rev"); get_midx_filename_ext(m->repo->hash_algo, &revindex_name, m->object_dir, get_midx_checksum(m), MIDX_EXT_REV); - ret = load_revindex_from_disk(revindex_name.buf, + ret = load_revindex_from_disk(m->repo->hash_algo, + revindex_name.buf, m->num_objects, &m->revindex_map, &m->revindex_len); @@ -419,7 +425,7 @@ int offset_to_pack_pos(struct packed_git *p, off_t ofs, uint32_t *pos) { unsigned lo, hi; - if (load_pack_revindex(the_repository, p) < 0) + if (load_pack_revindex(p->repo, p) < 0) return -1; lo = 0; @@ -465,7 +471,7 @@ off_t pack_pos_to_offset(struct packed_git *p, uint32_t pos) if (p->revindex) return p->revindex[pos].offset; else if (pos == p->num_objects) - return p->pack_size - the_hash_algo->rawsz; + return p->pack_size - p->repo->hash_algo->rawsz; else return nth_packed_object_offset(p, pack_pos_to_index(p, pos)); } From 1a6768d1dd6d92b34f25507091ff0775371e19cb Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 10 Mar 2025 08:13:26 +0100 Subject: [PATCH 075/775] pack-bitmap-write: stop depending on `the_repository` There are multiple sites in "pack-bitmap-write.c" where we use the global `the_repository` variable, either explicitly or implicitly by using `the_hash_algo`. Refactor the code so that the `struct bitmap_writer` stores the repository it is getting initialized with. Like this, we can adapt callsites that use `the_repository` to instead use the repository provided by the writer. Remove the `USE_THE_REPOSITORY_VARIABLE` define. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- pack-bitmap-write.c | 36 ++++++++++++++++++------------------ pack-bitmap.h | 1 + 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index 50e5c491ccb6af..6a97b52b36d424 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -1,4 +1,3 @@ -#define USE_THE_REPOSITORY_VARIABLE #define DISABLE_SIGN_COMPARE_WARNINGS #include "git-compat-util.h" @@ -48,6 +47,7 @@ void bitmap_writer_init(struct bitmap_writer *writer, struct repository *r, memset(writer, 0, sizeof(struct bitmap_writer)); if (writer->bitmaps) BUG("bitmap writer already initialized"); + writer->repo = r; writer->bitmaps = kh_init_oid_map(); writer->pseudo_merge_commits = kh_init_oid_map(); writer->to_pack = pdata; @@ -415,9 +415,9 @@ static void bitmap_builder_init(struct bitmap_builder *bb, bb->commits[bb->commits_nr++] = r->item; } - trace2_data_intmax("pack-bitmap-write", the_repository, + trace2_data_intmax("pack-bitmap-write", writer->repo, "num_selected_commits", writer->selected_nr); - trace2_data_intmax("pack-bitmap-write", the_repository, + trace2_data_intmax("pack-bitmap-write", writer->repo, "num_maximal_commits", num_maximal); release_revisions(&revs); @@ -460,7 +460,7 @@ static int fill_bitmap_tree(struct bitmap_writer *writer, switch (object_type(entry.mode)) { case OBJ_TREE: if (fill_bitmap_tree(writer, bitmap, - lookup_tree(the_repository, &entry.oid)) < 0) + lookup_tree(writer->repo, &entry.oid)) < 0) return -1; break; case OBJ_BLOB: @@ -536,7 +536,7 @@ static int fill_bitmap_commit(struct bitmap_writer *writer, return -1; bitmap_set(ent->bitmap, pos); prio_queue_put(tree_queue, - repo_get_commit_tree(the_repository, c)); + repo_get_commit_tree(writer->repo, c)); } for (p = c->parents; p; p = p->next) { @@ -590,11 +590,11 @@ int bitmap_writer_build(struct bitmap_writer *writer) int closed = 1; /* until proven otherwise */ if (writer->show_progress) - writer->progress = start_progress(the_repository, + writer->progress = start_progress(writer->repo, "Building bitmaps", writer->selected_nr); trace2_region_enter("pack-bitmap-write", "building_bitmaps_total", - the_repository); + writer->repo); old_bitmap = prepare_bitmap_git(writer->to_pack->repo); if (old_bitmap) @@ -645,10 +645,10 @@ int bitmap_writer_build(struct bitmap_writer *writer) free(mapping); trace2_region_leave("pack-bitmap-write", "building_bitmaps_total", - the_repository); - trace2_data_intmax("pack-bitmap-write", the_repository, + writer->repo); + trace2_data_intmax("pack-bitmap-write", writer->repo, "building_bitmaps_reused", reused_bitmaps_nr); - trace2_data_intmax("pack-bitmap-write", the_repository, + trace2_data_intmax("pack-bitmap-write", writer->repo, "building_bitmaps_pseudo_merge_reused", reused_pseudo_merge_bitmaps_nr); @@ -711,7 +711,7 @@ void bitmap_writer_select_commits(struct bitmap_writer *writer, } if (writer->show_progress) - writer->progress = start_progress(the_repository, + writer->progress = start_progress(writer->repo, "Selecting bitmap commits", 0); for (;;) { @@ -960,7 +960,7 @@ static void write_lookup_table(struct bitmap_writer *writer, struct hashfile *f, for (i = 0; i < bitmap_writer_nr_selected_commits(writer); i++) table_inv[table[i]] = i; - trace2_region_enter("pack-bitmap-write", "writing_lookup_table", the_repository); + trace2_region_enter("pack-bitmap-write", "writing_lookup_table", writer->repo); for (i = 0; i < bitmap_writer_nr_selected_commits(writer); i++) { struct bitmapped_commit *selected = &writer->selected[table[i]]; uint32_t xor_offset = selected->xor_offset; @@ -987,7 +987,7 @@ static void write_lookup_table(struct bitmap_writer *writer, struct hashfile *f, hashwrite_be64(f, (uint64_t)offsets[table[i]]); hashwrite_be32(f, xor_row); } - trace2_region_leave("pack-bitmap-write", "writing_lookup_table", the_repository); + trace2_region_leave("pack-bitmap-write", "writing_lookup_table", writer->repo); free(table); free(table_inv); @@ -1008,7 +1008,7 @@ static void write_hash_cache(struct hashfile *f, void bitmap_writer_set_checksum(struct bitmap_writer *writer, const unsigned char *sha1) { - hashcpy(writer->pack_checksum, sha1, the_repository->hash_algo); + hashcpy(writer->pack_checksum, sha1, writer->repo->hash_algo); } void bitmap_writer_finish(struct bitmap_writer *writer, @@ -1030,15 +1030,15 @@ void bitmap_writer_finish(struct bitmap_writer *writer, if (writer->pseudo_merges_nr) options |= BITMAP_OPT_PSEUDO_MERGES; - f = hashfd(the_repository->hash_algo, fd, tmp_file.buf); + f = hashfd(writer->repo->hash_algo, fd, tmp_file.buf); memcpy(header.magic, BITMAP_IDX_SIGNATURE, sizeof(BITMAP_IDX_SIGNATURE)); header.version = htons(default_version); header.options = htons(flags | options); header.entry_count = htonl(bitmap_writer_nr_selected_commits(writer)); - hashcpy(header.checksum, writer->pack_checksum, the_repository->hash_algo); + hashcpy(header.checksum, writer->pack_checksum, writer->repo->hash_algo); - hashwrite(f, &header, sizeof(header) - GIT_MAX_RAWSZ + the_hash_algo->rawsz); + hashwrite(f, &header, sizeof(header) - GIT_MAX_RAWSZ + writer->repo->hash_algo->rawsz); dump_bitmap(f, writer->commits); dump_bitmap(f, writer->trees); dump_bitmap(f, writer->blobs); @@ -1072,7 +1072,7 @@ void bitmap_writer_finish(struct bitmap_writer *writer, finalize_hashfile(f, NULL, FSYNC_COMPONENT_PACK_METADATA, CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE); - if (adjust_shared_perm(the_repository, tmp_file.buf)) + if (adjust_shared_perm(writer->repo, tmp_file.buf)) die_errno("unable to make temporary bitmap file readable"); if (rename(tmp_file.buf, filename)) diff --git a/pack-bitmap.h b/pack-bitmap.h index d7f4b8b8e95f85..53cd42772f3276 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -104,6 +104,7 @@ int bitmap_has_oid_in_uninteresting(struct bitmap_index *, const struct object_i off_t get_disk_usage_from_bitmap(struct bitmap_index *, struct rev_info *); struct bitmap_writer { + struct repository *repo; struct ewah_bitmap *commits; struct ewah_bitmap *trees; struct ewah_bitmap *blobs; From f6e174b2d8b83c17376b163fc4f8062fa3140a04 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 10 Mar 2025 08:13:27 +0100 Subject: [PATCH 076/775] object-file-convert: stop depending on `the_repository` There are multiple sites in "object-file-convert.c" where we use the global `the_repository` variable, either explicitly or implicitly by using `the_hash_algo`. All of these callsites are transitively called from `convert_object_file()`, which indeed has no repo as input. Refactor the function so that it receives a repository as a parameter and pass it through to all internal functions to get rid of the dependency. Remove the `USE_THE_REPOSITORY_VARIABLE` define. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/tag.c | 2 +- commit.c | 2 +- object-file-convert.c | 29 ++++++++++++++++------------- object-file-convert.h | 3 ++- object-file.c | 7 ++++--- 5 files changed, 24 insertions(+), 19 deletions(-) diff --git a/builtin/tag.c b/builtin/tag.c index d3e0943b734759..7c173535cb320f 100644 --- a/builtin/tag.c +++ b/builtin/tag.c @@ -172,7 +172,7 @@ static int do_sign(struct strbuf *buffer, struct object_id **compat_oid, if (compat) { const struct git_hash_algo *algo = the_repository->hash_algo; - if (convert_object_file(&compat_buf, algo, compat, + if (convert_object_file(the_repository ,&compat_buf, algo, compat, buffer->buf, buffer->len, OBJ_TAG, 1)) goto out; if (sign_buffer(&compat_buf, &compat_sig, keyid)) diff --git a/commit.c b/commit.c index 6efdb03997d9a0..48aeefaad31709 100644 --- a/commit.c +++ b/commit.c @@ -1380,7 +1380,7 @@ static int convert_commit_extra_headers(const struct commit_extra_header *orig, struct commit_extra_header *new; CALLOC_ARRAY(new, 1); if (!strcmp(orig->key, "mergetag")) { - if (convert_object_file(&out, algo, compat, + if (convert_object_file(the_repository, &out, algo, compat, orig->value, orig->len, OBJ_TAG, 1)) { free(new); diff --git a/object-file-convert.c b/object-file-convert.c index eba71955cf7b88..7ab875afe6cb3a 100644 --- a/object-file-convert.c +++ b/object-file-convert.c @@ -1,4 +1,3 @@ -#define USE_THE_REPOSITORY_VARIABLE #define DISABLE_SIGN_COMPARE_WARNINGS #include "git-compat-util.h" @@ -63,7 +62,8 @@ static int decode_tree_entry_raw(struct object_id *oid, const char **path, return 0; } -static int convert_tree_object(struct strbuf *out, +static int convert_tree_object(struct repository *repo, + struct strbuf *out, const struct git_hash_algo *from, const struct git_hash_algo *to, const char *buffer, size_t size) @@ -78,7 +78,7 @@ static int convert_tree_object(struct strbuf *out, if (decode_tree_entry_raw(&entry_oid, &path, &pathlen, from, p, end - p)) return error(_("failed to decode tree entry")); - if (repo_oid_to_algop(the_repository, &entry_oid, to, &mapped_oid)) + if (repo_oid_to_algop(repo, &entry_oid, to, &mapped_oid)) return error(_("failed to map tree entry for %s"), oid_to_hex(&entry_oid)); strbuf_add(out, p, path - p); strbuf_add(out, path, pathlen); @@ -88,7 +88,8 @@ static int convert_tree_object(struct strbuf *out, return 0; } -static int convert_tag_object(struct strbuf *out, +static int convert_tag_object(struct repository *repo, + struct strbuf *out, const struct git_hash_algo *from, const struct git_hash_algo *to, const char *buffer, size_t size) @@ -105,7 +106,7 @@ static int convert_tag_object(struct strbuf *out, return error("bogus tag object"); if (parse_oid_hex_algop(buffer + 7, &oid, &p, from) < 0) return error("bad tag object ID"); - if (repo_oid_to_algop(the_repository, &oid, to, &mapped_oid)) + if (repo_oid_to_algop(repo, &oid, to, &mapped_oid)) return error("unable to map tree %s in tag object", oid_to_hex(&oid)); size -= ((p + 1) - buffer); @@ -139,7 +140,8 @@ static int convert_tag_object(struct strbuf *out, return 0; } -static int convert_commit_object(struct strbuf *out, +static int convert_commit_object(struct repository *repo, + struct strbuf *out, const struct git_hash_algo *from, const struct git_hash_algo *to, const char *buffer, size_t size) @@ -165,7 +167,7 @@ static int convert_commit_object(struct strbuf *out, (p != eol)) return error(_("bad %s in commit"), "tree"); - if (repo_oid_to_algop(the_repository, &oid, to, &mapped_oid)) + if (repo_oid_to_algop(repo, &oid, to, &mapped_oid)) return error(_("unable to map %s %s in commit object"), "tree", oid_to_hex(&oid)); strbuf_addf(out, "tree %s\n", oid_to_hex(&mapped_oid)); @@ -177,7 +179,7 @@ static int convert_commit_object(struct strbuf *out, (p != eol)) return error(_("bad %s in commit"), "parent"); - if (repo_oid_to_algop(the_repository, &oid, to, &mapped_oid)) + if (repo_oid_to_algop(repo, &oid, to, &mapped_oid)) return error(_("unable to map %s %s in commit object"), "parent", oid_to_hex(&oid)); @@ -202,7 +204,7 @@ static int convert_commit_object(struct strbuf *out, } /* Compute the new tag object */ - if (convert_tag_object(&new_tag, from, to, tag.buf, tag.len)) { + if (convert_tag_object(repo, &new_tag, from, to, tag.buf, tag.len)) { strbuf_release(&tag); strbuf_release(&new_tag); return -1; @@ -241,7 +243,8 @@ static int convert_commit_object(struct strbuf *out, return 0; } -int convert_object_file(struct strbuf *outbuf, +int convert_object_file(struct repository *repo, + struct strbuf *outbuf, const struct git_hash_algo *from, const struct git_hash_algo *to, const void *buf, size_t len, @@ -256,13 +259,13 @@ int convert_object_file(struct strbuf *outbuf, switch (type) { case OBJ_COMMIT: - ret = convert_commit_object(outbuf, from, to, buf, len); + ret = convert_commit_object(repo, outbuf, from, to, buf, len); break; case OBJ_TREE: - ret = convert_tree_object(outbuf, from, to, buf, len); + ret = convert_tree_object(repo, outbuf, from, to, buf, len); break; case OBJ_TAG: - ret = convert_tag_object(outbuf, from, to, buf, len); + ret = convert_tag_object(repo, outbuf, from, to, buf, len); break; default: /* Not implemented yet, so fail. */ diff --git a/object-file-convert.h b/object-file-convert.h index a4f802aa8eeae4..9b3cc5e533dda5 100644 --- a/object-file-convert.h +++ b/object-file-convert.h @@ -14,7 +14,8 @@ int repo_oid_to_algop(struct repository *repo, const struct object_id *src, * Convert an object file from one hash algorithm to another algorithm. * Return -1 on failure, 0 on success. */ -int convert_object_file(struct strbuf *outbuf, +int convert_object_file(struct repository *repo, + struct strbuf *outbuf, const struct git_hash_algo *from, const struct git_hash_algo *to, const void *buf, size_t len, diff --git a/object-file.c b/object-file.c index b3e0276b2a45e1..b0e237a2acccc9 100644 --- a/object-file.c +++ b/object-file.c @@ -1793,7 +1793,7 @@ static int oid_object_info_convert(struct repository *r, if (type == -1) return -1; if (type != OBJ_BLOB) { - ret = convert_object_file(&outbuf, + ret = convert_object_file(the_repository, &outbuf, the_hash_algo, input_algo, content, size, type, !do_die); free(content); @@ -2510,7 +2510,7 @@ int write_object_file_flags(const void *buf, unsigned long len, hash_object_file(compat, buf, len, type, &compat_oid); else { struct strbuf converted = STRBUF_INIT; - convert_object_file(&converted, algo, compat, + convert_object_file(the_repository, &converted, algo, compat, buf, len, type, 0); hash_object_file(compat, converted.buf, converted.len, type, &compat_oid); @@ -2550,7 +2550,8 @@ int write_object_file_literally(const void *buf, unsigned long len, &compat_oid); else if (compat_type != -1) { struct strbuf converted = STRBUF_INIT; - convert_object_file(&converted, algo, compat, + convert_object_file(the_repository, + &converted, algo, compat, buf, len, compat_type, 0); hash_object_file(compat, converted.buf, converted.len, compat_type, &compat_oid); From 19be71db9c3faafc113c16b4ca9fc30281baf1ce Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 10 Mar 2025 08:13:28 +0100 Subject: [PATCH 077/775] delta-islands: stop depending on `the_repository` There are multiple sites in "delta-islands.c" where we use the global `the_repository` variable, either explicitly or implicitly by using `the_hash_algo`. Refactor the code to stop using `the_repository`. In most cases this is trivial because we already had a repository available in the calling context, with the only exception being `propagate_island_marks()`. Adapt it so that the repository gets passed in via a parameter. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 2 +- delta-islands.c | 14 ++++++-------- delta-islands.h | 2 +- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index eefc6c7809607f..fba5db74656735 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3847,7 +3847,7 @@ static void show_commit(struct commit *commit, void *data UNUSED) index_commit_for_bitmap(commit); if (use_delta_islands) - propagate_island_marks(commit); + propagate_island_marks(the_repository, commit); } static void show_object(struct object *obj, const char *name, diff --git a/delta-islands.c b/delta-islands.c index 3aec43fada36f7..36c94799d69d7a 100644 --- a/delta-islands.c +++ b/delta-islands.c @@ -1,4 +1,3 @@ -#define USE_THE_REPOSITORY_VARIABLE #define DISABLE_SIGN_COMPARE_WARNINGS #include "git-compat-util.h" @@ -267,8 +266,7 @@ void resolve_tree_islands(struct repository *r, QSORT(todo, nr, tree_depth_compare); if (progress) - progress_state = start_progress(the_repository, - _("Propagating island marks"), nr); + progress_state = start_progress(r, _("Propagating island marks"), nr); for (i = 0; i < nr; i++) { struct object_entry *ent = todo[i].entry; @@ -490,9 +488,9 @@ void load_delta_islands(struct repository *r, int progress) island_marks = kh_init_oid_map(); - git_config(island_config_callback, &ild); + repo_config(r, island_config_callback, &ild); ild.remote_islands = kh_init_str(); - refs_for_each_ref(get_main_ref_store(the_repository), + refs_for_each_ref(get_main_ref_store(r), find_island_for_ref, &ild); free_config_regexes(&ild); deduplicate_islands(ild.remote_islands, r); @@ -502,7 +500,7 @@ void load_delta_islands(struct repository *r, int progress) fprintf(stderr, _("Marked %d islands, done.\n"), island_counter); } -void propagate_island_marks(struct commit *commit) +void propagate_island_marks(struct repository *r, struct commit *commit) { khiter_t pos = kh_get_oid_map(island_marks, commit->object.oid); @@ -510,8 +508,8 @@ void propagate_island_marks(struct commit *commit) struct commit_list *p; struct island_bitmap *root_marks = kh_value(island_marks, pos); - repo_parse_commit(the_repository, commit); - set_island_marks(&repo_get_commit_tree(the_repository, commit)->object, + repo_parse_commit(r, commit); + set_island_marks(&repo_get_commit_tree(r, commit)->object, root_marks); for (p = commit->parents; p; p = p->next) set_island_marks(&p->item->object, root_marks); diff --git a/delta-islands.h b/delta-islands.h index 8d1591ae28be30..6107660306bd55 100644 --- a/delta-islands.h +++ b/delta-islands.h @@ -12,7 +12,7 @@ void resolve_tree_islands(struct repository *r, int progress, struct packing_data *to_pack); void load_delta_islands(struct repository *r, int progress); -void propagate_island_marks(struct commit *commit); +void propagate_island_marks(struct repository *r, struct commit *commit); int compute_pack_layers(struct packing_data *to_pack); void free_island_marks(void); From 172d0f686b07a73cba2d307b2abf08e2819a77e1 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 10 Mar 2025 08:13:29 +0100 Subject: [PATCH 078/775] object-file: split out logic regarding hash algorithms While we have a "hash.h" header, the actual implementation of the subsystem is hosted by "object-file.c". This makes it harder than necessary to find the actual implementation of the hash subsystem and intermingles the different concerns with one another. Split out the implementation of hash algorithms into a new, separate "hash.c" file. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Makefile | 1 + hash.c | 283 ++++++++++++++++++++++++++++++++++++++++++++++++++ meson.build | 1 + object-file.c | 277 ------------------------------------------------ 4 files changed, 285 insertions(+), 277 deletions(-) create mode 100644 hash.c diff --git a/Makefile b/Makefile index 4a7fb1537ed879..525d2b02ecb78f 100644 --- a/Makefile +++ b/Makefile @@ -1041,6 +1041,7 @@ LIB_OBJS += gpg-interface.o LIB_OBJS += graph.o LIB_OBJS += grep.o LIB_OBJS += hash-lookup.o +LIB_OBJS += hash.o LIB_OBJS += hashmap.o LIB_OBJS += help.o LIB_OBJS += hex.o diff --git a/hash.c b/hash.c new file mode 100644 index 00000000000000..dd5ac9d0eb247f --- /dev/null +++ b/hash.c @@ -0,0 +1,283 @@ +#define USE_THE_REPOSITORY_VARIABLE +#define DISABLE_SIGN_COMPARE_WARNINGS + +#include "git-compat-util.h" +#include "hash.h" +#include "hex.h" + +static const struct object_id empty_tree_oid = { + .hash = { + 0x4b, 0x82, 0x5d, 0xc6, 0x42, 0xcb, 0x6e, 0xb9, 0xa0, 0x60, + 0xe5, 0x4b, 0xf8, 0xd6, 0x92, 0x88, 0xfb, 0xee, 0x49, 0x04 + }, + .algo = GIT_HASH_SHA1, +}; +static const struct object_id empty_blob_oid = { + .hash = { + 0xe6, 0x9d, 0xe2, 0x9b, 0xb2, 0xd1, 0xd6, 0x43, 0x4b, 0x8b, + 0x29, 0xae, 0x77, 0x5a, 0xd8, 0xc2, 0xe4, 0x8c, 0x53, 0x91 + }, + .algo = GIT_HASH_SHA1, +}; +static const struct object_id null_oid_sha1 = { + .hash = {0}, + .algo = GIT_HASH_SHA1, +}; +static const struct object_id empty_tree_oid_sha256 = { + .hash = { + 0x6e, 0xf1, 0x9b, 0x41, 0x22, 0x5c, 0x53, 0x69, 0xf1, 0xc1, + 0x04, 0xd4, 0x5d, 0x8d, 0x85, 0xef, 0xa9, 0xb0, 0x57, 0xb5, + 0x3b, 0x14, 0xb4, 0xb9, 0xb9, 0x39, 0xdd, 0x74, 0xde, 0xcc, + 0x53, 0x21 + }, + .algo = GIT_HASH_SHA256, +}; +static const struct object_id empty_blob_oid_sha256 = { + .hash = { + 0x47, 0x3a, 0x0f, 0x4c, 0x3b, 0xe8, 0xa9, 0x36, 0x81, 0xa2, + 0x67, 0xe3, 0xb1, 0xe9, 0xa7, 0xdc, 0xda, 0x11, 0x85, 0x43, + 0x6f, 0xe1, 0x41, 0xf7, 0x74, 0x91, 0x20, 0xa3, 0x03, 0x72, + 0x18, 0x13 + }, + .algo = GIT_HASH_SHA256, +}; +static const struct object_id null_oid_sha256 = { + .hash = {0}, + .algo = GIT_HASH_SHA256, +}; + +static void git_hash_sha1_init(struct git_hash_ctx *ctx) +{ + ctx->algop = &hash_algos[GIT_HASH_SHA1]; + git_SHA1_Init(&ctx->state.sha1); +} + +static void git_hash_sha1_clone(struct git_hash_ctx *dst, const struct git_hash_ctx *src) +{ + dst->algop = src->algop; + git_SHA1_Clone(&dst->state.sha1, &src->state.sha1); +} + +static void git_hash_sha1_update(struct git_hash_ctx *ctx, const void *data, size_t len) +{ + git_SHA1_Update(&ctx->state.sha1, data, len); +} + +static void git_hash_sha1_final(unsigned char *hash, struct git_hash_ctx *ctx) +{ + git_SHA1_Final(hash, &ctx->state.sha1); +} + +static void git_hash_sha1_final_oid(struct object_id *oid, struct git_hash_ctx *ctx) +{ + git_SHA1_Final(oid->hash, &ctx->state.sha1); + memset(oid->hash + GIT_SHA1_RAWSZ, 0, GIT_MAX_RAWSZ - GIT_SHA1_RAWSZ); + oid->algo = GIT_HASH_SHA1; +} + +static void git_hash_sha1_init_unsafe(struct git_hash_ctx *ctx) +{ + ctx->algop = unsafe_hash_algo(&hash_algos[GIT_HASH_SHA1]); + git_SHA1_Init_unsafe(&ctx->state.sha1_unsafe); +} + +static void git_hash_sha1_clone_unsafe(struct git_hash_ctx *dst, const struct git_hash_ctx *src) +{ + dst->algop = src->algop; + git_SHA1_Clone_unsafe(&dst->state.sha1_unsafe, &src->state.sha1_unsafe); +} + +static void git_hash_sha1_update_unsafe(struct git_hash_ctx *ctx, const void *data, + size_t len) +{ + git_SHA1_Update_unsafe(&ctx->state.sha1_unsafe, data, len); +} + +static void git_hash_sha1_final_unsafe(unsigned char *hash, struct git_hash_ctx *ctx) +{ + git_SHA1_Final_unsafe(hash, &ctx->state.sha1_unsafe); +} + +static void git_hash_sha1_final_oid_unsafe(struct object_id *oid, struct git_hash_ctx *ctx) +{ + git_SHA1_Final_unsafe(oid->hash, &ctx->state.sha1_unsafe); + memset(oid->hash + GIT_SHA1_RAWSZ, 0, GIT_MAX_RAWSZ - GIT_SHA1_RAWSZ); + oid->algo = GIT_HASH_SHA1; +} + +static void git_hash_sha256_init(struct git_hash_ctx *ctx) +{ + ctx->algop = unsafe_hash_algo(&hash_algos[GIT_HASH_SHA256]); + git_SHA256_Init(&ctx->state.sha256); +} + +static void git_hash_sha256_clone(struct git_hash_ctx *dst, const struct git_hash_ctx *src) +{ + dst->algop = src->algop; + git_SHA256_Clone(&dst->state.sha256, &src->state.sha256); +} + +static void git_hash_sha256_update(struct git_hash_ctx *ctx, const void *data, size_t len) +{ + git_SHA256_Update(&ctx->state.sha256, data, len); +} + +static void git_hash_sha256_final(unsigned char *hash, struct git_hash_ctx *ctx) +{ + git_SHA256_Final(hash, &ctx->state.sha256); +} + +static void git_hash_sha256_final_oid(struct object_id *oid, struct git_hash_ctx *ctx) +{ + git_SHA256_Final(oid->hash, &ctx->state.sha256); + /* + * This currently does nothing, so the compiler should optimize it out, + * but keep it in case we extend the hash size again. + */ + memset(oid->hash + GIT_SHA256_RAWSZ, 0, GIT_MAX_RAWSZ - GIT_SHA256_RAWSZ); + oid->algo = GIT_HASH_SHA256; +} + +static void git_hash_unknown_init(struct git_hash_ctx *ctx UNUSED) +{ + BUG("trying to init unknown hash"); +} + +static void git_hash_unknown_clone(struct git_hash_ctx *dst UNUSED, + const struct git_hash_ctx *src UNUSED) +{ + BUG("trying to clone unknown hash"); +} + +static void git_hash_unknown_update(struct git_hash_ctx *ctx UNUSED, + const void *data UNUSED, + size_t len UNUSED) +{ + BUG("trying to update unknown hash"); +} + +static void git_hash_unknown_final(unsigned char *hash UNUSED, + struct git_hash_ctx *ctx UNUSED) +{ + BUG("trying to finalize unknown hash"); +} + +static void git_hash_unknown_final_oid(struct object_id *oid UNUSED, + struct git_hash_ctx *ctx UNUSED) +{ + BUG("trying to finalize unknown hash"); +} + +static const struct git_hash_algo sha1_unsafe_algo = { + .name = "sha1", + .format_id = GIT_SHA1_FORMAT_ID, + .rawsz = GIT_SHA1_RAWSZ, + .hexsz = GIT_SHA1_HEXSZ, + .blksz = GIT_SHA1_BLKSZ, + .init_fn = git_hash_sha1_init_unsafe, + .clone_fn = git_hash_sha1_clone_unsafe, + .update_fn = git_hash_sha1_update_unsafe, + .final_fn = git_hash_sha1_final_unsafe, + .final_oid_fn = git_hash_sha1_final_oid_unsafe, + .empty_tree = &empty_tree_oid, + .empty_blob = &empty_blob_oid, + .null_oid = &null_oid_sha1, +}; + +const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = { + { + .name = NULL, + .format_id = 0x00000000, + .rawsz = 0, + .hexsz = 0, + .blksz = 0, + .init_fn = git_hash_unknown_init, + .clone_fn = git_hash_unknown_clone, + .update_fn = git_hash_unknown_update, + .final_fn = git_hash_unknown_final, + .final_oid_fn = git_hash_unknown_final_oid, + .empty_tree = NULL, + .empty_blob = NULL, + .null_oid = NULL, + }, + { + .name = "sha1", + .format_id = GIT_SHA1_FORMAT_ID, + .rawsz = GIT_SHA1_RAWSZ, + .hexsz = GIT_SHA1_HEXSZ, + .blksz = GIT_SHA1_BLKSZ, + .init_fn = git_hash_sha1_init, + .clone_fn = git_hash_sha1_clone, + .update_fn = git_hash_sha1_update, + .final_fn = git_hash_sha1_final, + .final_oid_fn = git_hash_sha1_final_oid, + .unsafe = &sha1_unsafe_algo, + .empty_tree = &empty_tree_oid, + .empty_blob = &empty_blob_oid, + .null_oid = &null_oid_sha1, + }, + { + .name = "sha256", + .format_id = GIT_SHA256_FORMAT_ID, + .rawsz = GIT_SHA256_RAWSZ, + .hexsz = GIT_SHA256_HEXSZ, + .blksz = GIT_SHA256_BLKSZ, + .init_fn = git_hash_sha256_init, + .clone_fn = git_hash_sha256_clone, + .update_fn = git_hash_sha256_update, + .final_fn = git_hash_sha256_final, + .final_oid_fn = git_hash_sha256_final_oid, + .empty_tree = &empty_tree_oid_sha256, + .empty_blob = &empty_blob_oid_sha256, + .null_oid = &null_oid_sha256, + } +}; + +const struct object_id *null_oid(void) +{ + return the_hash_algo->null_oid; +} + +const char *empty_tree_oid_hex(const struct git_hash_algo *algop) +{ + static char buf[GIT_MAX_HEXSZ + 1]; + return oid_to_hex_r(buf, algop->empty_tree); +} + +int hash_algo_by_name(const char *name) +{ + int i; + if (!name) + return GIT_HASH_UNKNOWN; + for (i = 1; i < GIT_HASH_NALGOS; i++) + if (!strcmp(name, hash_algos[i].name)) + return i; + return GIT_HASH_UNKNOWN; +} + +int hash_algo_by_id(uint32_t format_id) +{ + int i; + for (i = 1; i < GIT_HASH_NALGOS; i++) + if (format_id == hash_algos[i].format_id) + return i; + return GIT_HASH_UNKNOWN; +} + +int hash_algo_by_length(int len) +{ + int i; + for (i = 1; i < GIT_HASH_NALGOS; i++) + if (len == hash_algos[i].rawsz) + return i; + return GIT_HASH_UNKNOWN; +} + +const struct git_hash_algo *unsafe_hash_algo(const struct git_hash_algo *algop) +{ + /* If we have a faster "unsafe" implementation, use that. */ + if (algop->unsafe) + return algop->unsafe; + /* Otherwise use the default one. */ + return algop; +} diff --git a/meson.build b/meson.build index e86085b0a474a1..83e7fab4d51d73 100644 --- a/meson.build +++ b/meson.build @@ -310,6 +310,7 @@ libgit_sources = [ 'graph.c', 'grep.c', 'hash-lookup.c', + 'hash.c', 'hashmap.c', 'help.c', 'hex.c', diff --git a/object-file.c b/object-file.c index b0e237a2acccc9..de603e2ca8cdbf 100644 --- a/object-file.c +++ b/object-file.c @@ -45,283 +45,6 @@ /* The maximum size for an object header. */ #define MAX_HEADER_LEN 32 -static const struct object_id empty_tree_oid = { - .hash = { - 0x4b, 0x82, 0x5d, 0xc6, 0x42, 0xcb, 0x6e, 0xb9, 0xa0, 0x60, - 0xe5, 0x4b, 0xf8, 0xd6, 0x92, 0x88, 0xfb, 0xee, 0x49, 0x04 - }, - .algo = GIT_HASH_SHA1, -}; -static const struct object_id empty_blob_oid = { - .hash = { - 0xe6, 0x9d, 0xe2, 0x9b, 0xb2, 0xd1, 0xd6, 0x43, 0x4b, 0x8b, - 0x29, 0xae, 0x77, 0x5a, 0xd8, 0xc2, 0xe4, 0x8c, 0x53, 0x91 - }, - .algo = GIT_HASH_SHA1, -}; -static const struct object_id null_oid_sha1 = { - .hash = {0}, - .algo = GIT_HASH_SHA1, -}; -static const struct object_id empty_tree_oid_sha256 = { - .hash = { - 0x6e, 0xf1, 0x9b, 0x41, 0x22, 0x5c, 0x53, 0x69, 0xf1, 0xc1, - 0x04, 0xd4, 0x5d, 0x8d, 0x85, 0xef, 0xa9, 0xb0, 0x57, 0xb5, - 0x3b, 0x14, 0xb4, 0xb9, 0xb9, 0x39, 0xdd, 0x74, 0xde, 0xcc, - 0x53, 0x21 - }, - .algo = GIT_HASH_SHA256, -}; -static const struct object_id empty_blob_oid_sha256 = { - .hash = { - 0x47, 0x3a, 0x0f, 0x4c, 0x3b, 0xe8, 0xa9, 0x36, 0x81, 0xa2, - 0x67, 0xe3, 0xb1, 0xe9, 0xa7, 0xdc, 0xda, 0x11, 0x85, 0x43, - 0x6f, 0xe1, 0x41, 0xf7, 0x74, 0x91, 0x20, 0xa3, 0x03, 0x72, - 0x18, 0x13 - }, - .algo = GIT_HASH_SHA256, -}; -static const struct object_id null_oid_sha256 = { - .hash = {0}, - .algo = GIT_HASH_SHA256, -}; - -static void git_hash_sha1_init(struct git_hash_ctx *ctx) -{ - ctx->algop = &hash_algos[GIT_HASH_SHA1]; - git_SHA1_Init(&ctx->state.sha1); -} - -static void git_hash_sha1_clone(struct git_hash_ctx *dst, const struct git_hash_ctx *src) -{ - dst->algop = src->algop; - git_SHA1_Clone(&dst->state.sha1, &src->state.sha1); -} - -static void git_hash_sha1_update(struct git_hash_ctx *ctx, const void *data, size_t len) -{ - git_SHA1_Update(&ctx->state.sha1, data, len); -} - -static void git_hash_sha1_final(unsigned char *hash, struct git_hash_ctx *ctx) -{ - git_SHA1_Final(hash, &ctx->state.sha1); -} - -static void git_hash_sha1_final_oid(struct object_id *oid, struct git_hash_ctx *ctx) -{ - git_SHA1_Final(oid->hash, &ctx->state.sha1); - memset(oid->hash + GIT_SHA1_RAWSZ, 0, GIT_MAX_RAWSZ - GIT_SHA1_RAWSZ); - oid->algo = GIT_HASH_SHA1; -} - -static void git_hash_sha1_init_unsafe(struct git_hash_ctx *ctx) -{ - ctx->algop = unsafe_hash_algo(&hash_algos[GIT_HASH_SHA1]); - git_SHA1_Init_unsafe(&ctx->state.sha1_unsafe); -} - -static void git_hash_sha1_clone_unsafe(struct git_hash_ctx *dst, const struct git_hash_ctx *src) -{ - dst->algop = src->algop; - git_SHA1_Clone_unsafe(&dst->state.sha1_unsafe, &src->state.sha1_unsafe); -} - -static void git_hash_sha1_update_unsafe(struct git_hash_ctx *ctx, const void *data, - size_t len) -{ - git_SHA1_Update_unsafe(&ctx->state.sha1_unsafe, data, len); -} - -static void git_hash_sha1_final_unsafe(unsigned char *hash, struct git_hash_ctx *ctx) -{ - git_SHA1_Final_unsafe(hash, &ctx->state.sha1_unsafe); -} - -static void git_hash_sha1_final_oid_unsafe(struct object_id *oid, struct git_hash_ctx *ctx) -{ - git_SHA1_Final_unsafe(oid->hash, &ctx->state.sha1_unsafe); - memset(oid->hash + GIT_SHA1_RAWSZ, 0, GIT_MAX_RAWSZ - GIT_SHA1_RAWSZ); - oid->algo = GIT_HASH_SHA1; -} - -static void git_hash_sha256_init(struct git_hash_ctx *ctx) -{ - ctx->algop = unsafe_hash_algo(&hash_algos[GIT_HASH_SHA256]); - git_SHA256_Init(&ctx->state.sha256); -} - -static void git_hash_sha256_clone(struct git_hash_ctx *dst, const struct git_hash_ctx *src) -{ - dst->algop = src->algop; - git_SHA256_Clone(&dst->state.sha256, &src->state.sha256); -} - -static void git_hash_sha256_update(struct git_hash_ctx *ctx, const void *data, size_t len) -{ - git_SHA256_Update(&ctx->state.sha256, data, len); -} - -static void git_hash_sha256_final(unsigned char *hash, struct git_hash_ctx *ctx) -{ - git_SHA256_Final(hash, &ctx->state.sha256); -} - -static void git_hash_sha256_final_oid(struct object_id *oid, struct git_hash_ctx *ctx) -{ - git_SHA256_Final(oid->hash, &ctx->state.sha256); - /* - * This currently does nothing, so the compiler should optimize it out, - * but keep it in case we extend the hash size again. - */ - memset(oid->hash + GIT_SHA256_RAWSZ, 0, GIT_MAX_RAWSZ - GIT_SHA256_RAWSZ); - oid->algo = GIT_HASH_SHA256; -} - -static void git_hash_unknown_init(struct git_hash_ctx *ctx UNUSED) -{ - BUG("trying to init unknown hash"); -} - -static void git_hash_unknown_clone(struct git_hash_ctx *dst UNUSED, - const struct git_hash_ctx *src UNUSED) -{ - BUG("trying to clone unknown hash"); -} - -static void git_hash_unknown_update(struct git_hash_ctx *ctx UNUSED, - const void *data UNUSED, - size_t len UNUSED) -{ - BUG("trying to update unknown hash"); -} - -static void git_hash_unknown_final(unsigned char *hash UNUSED, - struct git_hash_ctx *ctx UNUSED) -{ - BUG("trying to finalize unknown hash"); -} - -static void git_hash_unknown_final_oid(struct object_id *oid UNUSED, - struct git_hash_ctx *ctx UNUSED) -{ - BUG("trying to finalize unknown hash"); -} - -static const struct git_hash_algo sha1_unsafe_algo = { - .name = "sha1", - .format_id = GIT_SHA1_FORMAT_ID, - .rawsz = GIT_SHA1_RAWSZ, - .hexsz = GIT_SHA1_HEXSZ, - .blksz = GIT_SHA1_BLKSZ, - .init_fn = git_hash_sha1_init_unsafe, - .clone_fn = git_hash_sha1_clone_unsafe, - .update_fn = git_hash_sha1_update_unsafe, - .final_fn = git_hash_sha1_final_unsafe, - .final_oid_fn = git_hash_sha1_final_oid_unsafe, - .empty_tree = &empty_tree_oid, - .empty_blob = &empty_blob_oid, - .null_oid = &null_oid_sha1, -}; - -const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = { - { - .name = NULL, - .format_id = 0x00000000, - .rawsz = 0, - .hexsz = 0, - .blksz = 0, - .init_fn = git_hash_unknown_init, - .clone_fn = git_hash_unknown_clone, - .update_fn = git_hash_unknown_update, - .final_fn = git_hash_unknown_final, - .final_oid_fn = git_hash_unknown_final_oid, - .empty_tree = NULL, - .empty_blob = NULL, - .null_oid = NULL, - }, - { - .name = "sha1", - .format_id = GIT_SHA1_FORMAT_ID, - .rawsz = GIT_SHA1_RAWSZ, - .hexsz = GIT_SHA1_HEXSZ, - .blksz = GIT_SHA1_BLKSZ, - .init_fn = git_hash_sha1_init, - .clone_fn = git_hash_sha1_clone, - .update_fn = git_hash_sha1_update, - .final_fn = git_hash_sha1_final, - .final_oid_fn = git_hash_sha1_final_oid, - .unsafe = &sha1_unsafe_algo, - .empty_tree = &empty_tree_oid, - .empty_blob = &empty_blob_oid, - .null_oid = &null_oid_sha1, - }, - { - .name = "sha256", - .format_id = GIT_SHA256_FORMAT_ID, - .rawsz = GIT_SHA256_RAWSZ, - .hexsz = GIT_SHA256_HEXSZ, - .blksz = GIT_SHA256_BLKSZ, - .init_fn = git_hash_sha256_init, - .clone_fn = git_hash_sha256_clone, - .update_fn = git_hash_sha256_update, - .final_fn = git_hash_sha256_final, - .final_oid_fn = git_hash_sha256_final_oid, - .empty_tree = &empty_tree_oid_sha256, - .empty_blob = &empty_blob_oid_sha256, - .null_oid = &null_oid_sha256, - } -}; - -const struct object_id *null_oid(void) -{ - return the_hash_algo->null_oid; -} - -const char *empty_tree_oid_hex(const struct git_hash_algo *algop) -{ - static char buf[GIT_MAX_HEXSZ + 1]; - return oid_to_hex_r(buf, algop->empty_tree); -} - -int hash_algo_by_name(const char *name) -{ - int i; - if (!name) - return GIT_HASH_UNKNOWN; - for (i = 1; i < GIT_HASH_NALGOS; i++) - if (!strcmp(name, hash_algos[i].name)) - return i; - return GIT_HASH_UNKNOWN; -} - -int hash_algo_by_id(uint32_t format_id) -{ - int i; - for (i = 1; i < GIT_HASH_NALGOS; i++) - if (format_id == hash_algos[i].format_id) - return i; - return GIT_HASH_UNKNOWN; -} - -int hash_algo_by_length(int len) -{ - int i; - for (i = 1; i < GIT_HASH_NALGOS; i++) - if (len == hash_algos[i].rawsz) - return i; - return GIT_HASH_UNKNOWN; -} - -const struct git_hash_algo *unsafe_hash_algo(const struct git_hash_algo *algop) -{ - /* If we have a faster "unsafe" implementation, use that. */ - if (algop->unsafe) - return algop->unsafe; - /* Otherwise use the default one. */ - return algop; -} - /* * This is meant to hold a *small* number of objects that you would * want repo_read_object_file() to be able to return, but yet you do not want From 8ca9fa60a63c97bb9397f5de294aef53df90e7a0 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 10 Mar 2025 08:13:30 +0100 Subject: [PATCH 079/775] hash: fix "-Wsign-compare" warnings There are a couple of trivial "-Wsign-compare" warnings in "hash.c". Fix them. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- hash.c | 12 ++++-------- hash.h | 2 +- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/hash.c b/hash.c index dd5ac9d0eb247f..4d9f98f72c2fed 100644 --- a/hash.c +++ b/hash.c @@ -1,5 +1,4 @@ #define USE_THE_REPOSITORY_VARIABLE -#define DISABLE_SIGN_COMPARE_WARNINGS #include "git-compat-util.h" #include "hash.h" @@ -246,10 +245,9 @@ const char *empty_tree_oid_hex(const struct git_hash_algo *algop) int hash_algo_by_name(const char *name) { - int i; if (!name) return GIT_HASH_UNKNOWN; - for (i = 1; i < GIT_HASH_NALGOS; i++) + for (size_t i = 1; i < GIT_HASH_NALGOS; i++) if (!strcmp(name, hash_algos[i].name)) return i; return GIT_HASH_UNKNOWN; @@ -257,17 +255,15 @@ int hash_algo_by_name(const char *name) int hash_algo_by_id(uint32_t format_id) { - int i; - for (i = 1; i < GIT_HASH_NALGOS; i++) + for (size_t i = 1; i < GIT_HASH_NALGOS; i++) if (format_id == hash_algos[i].format_id) return i; return GIT_HASH_UNKNOWN; } -int hash_algo_by_length(int len) +int hash_algo_by_length(size_t len) { - int i; - for (i = 1; i < GIT_HASH_NALGOS; i++) + for (size_t i = 1; i < GIT_HASH_NALGOS; i++) if (len == hash_algos[i].rawsz) return i; return GIT_HASH_UNKNOWN; diff --git a/hash.h b/hash.h index 4367acfec5098a..55d67ed83f3ed9 100644 --- a/hash.h +++ b/hash.h @@ -325,7 +325,7 @@ int hash_algo_by_name(const char *name); /* Identical, except based on the format ID. */ int hash_algo_by_id(uint32_t format_id); /* Identical, except based on the length. */ -int hash_algo_by_length(int len); +int hash_algo_by_length(size_t len); /* Identical, except for a pointer to struct git_hash_algo. */ static inline int hash_algo_by_ptr(const struct git_hash_algo *p) { From 7d70b29c4f0b2fd3c6698956d9fb4026632d9c6e Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 10 Mar 2025 08:13:31 +0100 Subject: [PATCH 080/775] hash: stop depending on `the_repository` in `null_oid()` The `null_oid()` function returns the object ID that only consists of zeroes. Naturally, this ID also depends on the hash algorithm used, as the number of zeroes is different between SHA1 and SHA256. Consequently, the function returns the hash-algorithm-specific null object ID. This is currently done by depending on `the_hash_algo`, which implicitly makes us depend on `the_repository`. Refactor the function to instead pass in the hash algorithm for which we want to retrieve the null object ID. Adapt callsites accordingly by passing in `the_repository`, thus bubbling up the dependency on that global variable by one layer. There are a couple of trivial exceptions for subsystems that already got rid of `the_repository`. These subsystems instead use the repository that is available via the calling context: - "builtin/grep.c" - "grep.c" - "refs/debug.c" There are also two non-trivial exceptions: - "diff-no-index.c": Here we know that we may not have a repository initialized at all, so we cannot rely on `the_repository`. Instead, we adapt `diff_no_index()` to get a `struct git_hash_algo` as parameter. The only caller is located in "builtin/diff.c", where we know to call `repo_set_hash_algo()` in case we're running outside of a Git repository. Consequently, it is fine to continue passing `the_repository->hash_algo` even in this case. - "builtin/ls-files.c": There is an in-flight patch series that drops `USE_THE_REPOSITORY_VARIABLE` in this file, which causes a semantic conflict because we use `null_oid()` in `show_submodule()`. The value is passed to `repo_submodule_init()`, which may use the object ID to resolve a tree-ish in the superproject from which we want to read the submodule config. As such, the object ID should refer to an object in the superproject, and consequently we need to use its hash algorithm. This means that we could in theory just not bother about this edge case at all and just use `the_repository` in "diff-no-index.c". But doing so would feel misdesigned. Remove the `USE_THE_REPOSITORY_VARIABLE` preprocessor define in "hash.c". Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- archive.c | 2 +- blame.c | 2 +- branch.c | 2 +- builtin/checkout.c | 6 ++-- builtin/clone.c | 2 +- builtin/describe.c | 2 +- builtin/diff.c | 5 +-- builtin/fast-export.c | 10 +++--- builtin/fsck.c | 2 +- builtin/grep.c | 2 +- builtin/ls-files.c | 3 +- builtin/rebase.c | 2 +- builtin/receive-pack.c | 2 +- builtin/submodule--helper.c | 36 ++++++++++---------- builtin/unpack-objects.c | 2 +- builtin/update-ref.c | 2 +- builtin/worktree.c | 2 +- combine-diff.c | 2 +- diff-lib.c | 10 +++--- diff-no-index.c | 28 +++++++-------- diff.c | 8 ++--- diff.h | 2 +- dir.c | 2 +- grep.c | 2 +- hash.c | 6 ++-- hash.h | 2 +- log-tree.c | 2 +- merge-ort.c | 26 +++++++------- merge-recursive.c | 12 +++---- notes-merge.c | 2 +- notes.c | 2 +- object-file.c | 2 +- parse-options-cb.c | 2 +- range-diff.c | 2 +- read-cache.c | 2 +- refs.c | 12 +++---- refs/debug.c | 2 +- refs/files-backend.c | 2 +- reset.c | 2 +- sequencer.c | 10 +++--- submodule-config.c | 2 +- submodule.c | 28 +++++++-------- t/helper/test-ref-store.c | 2 +- t/helper/test-submodule-nested-repo-config.c | 2 +- tree-diff.c | 4 +-- upload-pack.c | 2 +- wt-status.c | 4 +-- xdiff-interface.c | 2 +- 48 files changed, 136 insertions(+), 136 deletions(-) diff --git a/archive.c b/archive.c index f4331aeb49e7b3..c95e398152498b 100644 --- a/archive.c +++ b/archive.c @@ -312,7 +312,7 @@ int write_archive_entries(struct archiver_args *args, struct object_id fake_oid; int i; - oidcpy(&fake_oid, null_oid()); + oidcpy(&fake_oid, null_oid(the_hash_algo)); if (args->baselen > 0 && args->base[args->baselen - 1] == '/') { size_t len = args->baselen; diff --git a/blame.c b/blame.c index a15ddf933352b0..703dab43e78f8b 100644 --- a/blame.c +++ b/blame.c @@ -255,7 +255,7 @@ static struct commit *fake_working_tree_commit(struct repository *r, switch (st.st_mode & S_IFMT) { case S_IFREG: if (opt->flags.allow_textconv && - textconv_object(r, read_from, mode, null_oid(), 0, &buf_ptr, &buf_len)) + textconv_object(r, read_from, mode, null_oid(the_hash_algo), 0, &buf_ptr, &buf_len)) strbuf_attach(&buf, buf_ptr, buf_len, buf_len + 1); else if (strbuf_read_file(&buf, read_from, st.st_size) != st.st_size) die_errno("cannot open or read '%s'", read_from); diff --git a/branch.c b/branch.c index 91297d55ac9f60..6d01d7d6bdb2e4 100644 --- a/branch.c +++ b/branch.c @@ -633,7 +633,7 @@ void create_branch(struct repository *r, 0, &err); if (!transaction || ref_transaction_update(transaction, ref.buf, - &oid, forcing ? NULL : null_oid(), + &oid, forcing ? NULL : null_oid(the_hash_algo), NULL, NULL, flags, msg, &err) || ref_transaction_commit(transaction, &err)) die("%s", err.buf); diff --git a/builtin/checkout.c b/builtin/checkout.c index 01ea9ff8b28022..2e7486cf6587fe 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -130,8 +130,8 @@ static int post_checkout_hook(struct commit *old_commit, struct commit *new_comm int changed) { return run_hooks_l(the_repository, "post-checkout", - oid_to_hex(old_commit ? &old_commit->object.oid : null_oid()), - oid_to_hex(new_commit ? &new_commit->object.oid : null_oid()), + oid_to_hex(old_commit ? &old_commit->object.oid : null_oid(the_hash_algo)), + oid_to_hex(new_commit ? &new_commit->object.oid : null_oid(the_hash_algo)), changed ? "1" : "0", NULL); /* "new_commit" can be NULL when checking out from the index before a commit exists. */ @@ -710,7 +710,7 @@ static int reset_tree(struct tree *tree, const struct checkout_opts *o, opts.src_index = the_repository->index; opts.dst_index = the_repository->index; init_checkout_metadata(&opts.meta, info->refname, - info->commit ? &info->commit->object.oid : null_oid(), + info->commit ? &info->commit->object.oid : null_oid(the_hash_algo), NULL); if (parse_tree(tree) < 0) return 128; diff --git a/builtin/clone.c b/builtin/clone.c index f14229abf450c0..36aba65e06c3c3 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -690,7 +690,7 @@ static int checkout(int submodule_progress, int filter_submodules, if (write_locked_index(the_repository->index, &lock_file, COMMIT_LOCK)) die(_("unable to write new index file")); - err |= run_hooks_l(the_repository, "post-checkout", oid_to_hex(null_oid()), + err |= run_hooks_l(the_repository, "post-checkout", oid_to_hex(null_oid(the_hash_algo)), oid_to_hex(&oid), "1", NULL); if (!err && (option_recurse_submodules.nr > 0)) { diff --git a/builtin/describe.c b/builtin/describe.c index e2e73f3d757cab..23df333fd04e56 100644 --- a/builtin/describe.c +++ b/builtin/describe.c @@ -518,7 +518,7 @@ static void describe_blob(struct object_id oid, struct strbuf *dst) { struct rev_info revs; struct strvec args = STRVEC_INIT; - struct process_commit_data pcd = { *null_oid(), oid, dst, &revs}; + struct process_commit_data pcd = { *null_oid(the_hash_algo), oid, dst, &revs}; strvec_pushl(&args, "internal: The first arg is not parsed", "--objects", "--in-commit-order", "--reverse", "HEAD", diff --git a/builtin/diff.c b/builtin/diff.c index a4fffee42c6c8c..fa963808c31848 100644 --- a/builtin/diff.c +++ b/builtin/diff.c @@ -104,7 +104,7 @@ static void builtin_diff_b_f(struct rev_info *revs, stuff_change(&revs->diffopt, blob[0]->mode, canon_mode(st.st_mode), - &blob[0]->item->oid, null_oid(), + &blob[0]->item->oid, null_oid(the_hash_algo), 1, 0, blob[0]->path ? blob[0]->path : path, path); @@ -498,7 +498,8 @@ int cmd_diff(int argc, /* If this is a no-index diff, just run it and exit there. */ if (no_index) - exit(diff_no_index(&rev, no_index == DIFF_NO_INDEX_IMPLICIT, + exit(diff_no_index(&rev, the_repository->hash_algo, + no_index == DIFF_NO_INDEX_IMPLICIT, argc, argv)); diff --git a/builtin/fast-export.c b/builtin/fast-export.c index a5c82eef1ded4d..7a5c2925cd2dcb 100644 --- a/builtin/fast-export.c +++ b/builtin/fast-export.c @@ -869,7 +869,7 @@ static void handle_tag(const char *name, struct tag *tag) p = rewrite_commit((struct commit *)tagged); if (!p) { printf("reset %s\nfrom %s\n\n", - name, oid_to_hex(null_oid())); + name, oid_to_hex(null_oid(the_hash_algo))); free(buf); return; } @@ -883,7 +883,7 @@ static void handle_tag(const char *name, struct tag *tag) if (tagged->type == OBJ_TAG) { printf("reset %s\nfrom %s\n\n", - name, oid_to_hex(null_oid())); + name, oid_to_hex(null_oid(the_hash_algo))); } skip_prefix(name, "refs/tags/", &name); printf("tag %s\n", name); @@ -1023,7 +1023,7 @@ static void handle_tags_and_duplicates(struct string_list *extras) * it. */ printf("reset %s\nfrom %s\n\n", - name, oid_to_hex(null_oid())); + name, oid_to_hex(null_oid(the_hash_algo))); continue; } @@ -1042,7 +1042,7 @@ static void handle_tags_and_duplicates(struct string_list *extras) if (!reference_excluded_commits) { /* delete the ref */ printf("reset %s\nfrom %s\n\n", - name, oid_to_hex(null_oid())); + name, oid_to_hex(null_oid(the_hash_algo))); continue; } /* set ref to commit using oid, not mark */ @@ -1153,7 +1153,7 @@ static void handle_deletes(void) continue; printf("reset %s\nfrom %s\n\n", - refspec->dst, oid_to_hex(null_oid())); + refspec->dst, oid_to_hex(null_oid(the_hash_algo))); } } diff --git a/builtin/fsck.c b/builtin/fsck.c index 62c7494bbda492..f86ecd8eaa5c46 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -625,7 +625,7 @@ static int fsck_loose(const struct object_id *oid, const char *path, void *data) void *contents = NULL; int eaten; struct object_info oi = OBJECT_INFO_INIT; - struct object_id real_oid = *null_oid(); + struct object_id real_oid = *null_oid(the_hash_algo); int err = 0; strbuf_reset(&cb_data->obj_type); diff --git a/builtin/grep.c b/builtin/grep.c index cbbf6f264390bf..283d64cab80974 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -453,7 +453,7 @@ static int grep_submodule(struct grep_opt *opt, return 0; subrepo = xmalloc(sizeof(*subrepo)); - if (repo_submodule_init(subrepo, superproject, path, null_oid())) { + if (repo_submodule_init(subrepo, superproject, path, null_oid(opt->repo->hash_algo))) { free(subrepo); return 0; } diff --git a/builtin/ls-files.c b/builtin/ls-files.c index a4431429b7de42..7bc418be5d475b 100644 --- a/builtin/ls-files.c +++ b/builtin/ls-files.c @@ -234,7 +234,8 @@ static void show_submodule(struct repository *superproject, { struct repository subrepo; - if (repo_submodule_init(&subrepo, superproject, path, null_oid())) + if (repo_submodule_init(&subrepo, superproject, path, + null_oid(superproject->hash_algo))) return; if (repo_read_index(&subrepo) < 0) diff --git a/builtin/rebase.c b/builtin/rebase.c index d4715ed35d77ed..1cd802dd92a53d 100644 --- a/builtin/rebase.c +++ b/builtin/rebase.c @@ -925,7 +925,7 @@ static void fill_branch_base(struct rebase_options *options, options->orig_head, &merge_bases) < 0) exit(128); if (!merge_bases || merge_bases->next) - oidcpy(branch_base, null_oid()); + oidcpy(branch_base, null_oid(the_hash_algo)); else oidcpy(branch_base, &merge_bases->item->object.oid); diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index 7b28fc9df6ec0b..b3e2a9d0c6074a 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -363,7 +363,7 @@ static void write_head_info(void) strvec_clear(&excludes_vector); if (!sent_capabilities) - show_ref("capabilities^{}", null_oid()); + show_ref("capabilities^{}", null_oid(the_hash_algo)); advertise_shallow_grafts(1); diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index c1a8029714bfe9..570226ea16653a 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -78,7 +78,7 @@ static int get_default_remote_submodule(const char *module_path, char **default_ int ret; if (repo_submodule_init(&subrepo, the_repository, module_path, - null_oid()) < 0) + null_oid(the_hash_algo)) < 0) return die_message(_("could not get a repository handle for submodule '%s'"), module_path); ret = repo_get_default_remote(&subrepo, default_remote); @@ -308,7 +308,7 @@ static void runcommand_in_submodule_cb(const struct cache_entry *list_item, displaypath = get_submodule_displaypath(path, info->prefix, info->super_prefix); - sub = submodule_from_path(the_repository, null_oid(), path); + sub = submodule_from_path(the_repository, null_oid(the_hash_algo), path); if (!sub) die(_("No url found for submodule path '%s' in .gitmodules"), @@ -468,7 +468,7 @@ static void init_submodule(const char *path, const char *prefix, displaypath = get_submodule_displaypath(path, prefix, super_prefix); - sub = submodule_from_path(the_repository, null_oid(), path); + sub = submodule_from_path(the_repository, null_oid(the_hash_algo), path); if (!sub) die(_("No url found for submodule path '%s' in .gitmodules"), @@ -645,14 +645,14 @@ static void status_submodule(const char *path, const struct object_id *ce_oid, if (validate_submodule_path(path) < 0) exit(128); - if (!submodule_from_path(the_repository, null_oid(), path)) + if (!submodule_from_path(the_repository, null_oid(the_hash_algo), path)) die(_("no submodule mapping found in .gitmodules for path '%s'"), path); displaypath = get_submodule_displaypath(path, prefix, super_prefix); if ((CE_STAGEMASK & ce_flags) >> CE_STAGESHIFT) { - print_status(flags, 'U', path, null_oid(), displaypath); + print_status(flags, 'U', path, null_oid(the_hash_algo), displaypath); goto cleanup; } @@ -912,7 +912,7 @@ static void generate_submodule_summary(struct summary_cb *info, struct strbuf errmsg = STRBUF_INIT; int total_commits = -1; - if (!info->cached && oideq(&p->oid_dst, null_oid())) { + if (!info->cached && oideq(&p->oid_dst, null_oid(the_hash_algo))) { if (S_ISGITLINK(p->mod_dst)) { struct ref_store *refs = repo_get_submodule_ref_store(the_repository, p->sm_path); @@ -1051,7 +1051,7 @@ static void prepare_submodule_summary(struct summary_cb *info, if (info->for_status && p->status != 'A' && (sub = submodule_from_path(the_repository, - null_oid(), p->sm_path))) { + null_oid(the_hash_algo), p->sm_path))) { char *config_key = NULL; const char *value; int ignore_all = 0; @@ -1259,7 +1259,7 @@ static void sync_submodule(const char *path, const char *prefix, if (validate_submodule_path(path) < 0) exit(128); - sub = submodule_from_path(the_repository, null_oid(), path); + sub = submodule_from_path(the_repository, null_oid(the_hash_algo), path); if (sub && sub->url) { if (starts_with_dot_dot_slash(sub->url) || @@ -1404,7 +1404,7 @@ static void deinit_submodule(const char *path, const char *prefix, if (validate_submodule_path(path) < 0) exit(128); - sub = submodule_from_path(the_repository, null_oid(), path); + sub = submodule_from_path(the_repository, null_oid(the_hash_algo), path); if (!sub || !sub->name) goto cleanup; @@ -1929,7 +1929,7 @@ static int determine_submodule_update_strategy(struct repository *r, enum submodule_update_type update, struct submodule_update_strategy *out) { - const struct submodule *sub = submodule_from_path(r, null_oid(), path); + const struct submodule *sub = submodule_from_path(r, null_oid(the_hash_algo), path); char *key; const char *val; int ret; @@ -2089,7 +2089,7 @@ static int prepare_to_clone_next_submodule(const struct cache_entry *ce, goto cleanup; } - sub = submodule_from_path(the_repository, null_oid(), ce->name); + sub = submodule_from_path(the_repository, null_oid(the_hash_algo), ce->name); if (!sub) { next_submodule_warn_missing(suc, out, displaypath); @@ -2485,7 +2485,7 @@ static int remote_submodule_branch(const char *path, const char **branch) char *key; *branch = NULL; - sub = submodule_from_path(the_repository, null_oid(), path); + sub = submodule_from_path(the_repository, null_oid(the_hash_algo), path); if (!sub) return die_message(_("could not initialize submodule at path '%s'"), path); @@ -2531,7 +2531,7 @@ static int ensure_core_worktree(const char *path) const char *cw; struct repository subrepo; - if (repo_submodule_init(&subrepo, the_repository, path, null_oid())) + if (repo_submodule_init(&subrepo, the_repository, path, null_oid(the_hash_algo))) return die_message(_("could not get a repository handle for submodule '%s'"), path); @@ -2644,7 +2644,7 @@ static int update_submodule(struct update_data *update_data) return ret; if (update_data->just_cloned) - oidcpy(&update_data->suboid, null_oid()); + oidcpy(&update_data->suboid, null_oid(the_hash_algo)); else if (repo_resolve_gitlink_ref(the_repository, update_data->sm_path, "HEAD", &update_data->suboid)) return die_message(_("Unable to find current revision in submodule path '%s'"), @@ -2697,8 +2697,8 @@ static int update_submodule(struct update_data *update_data) struct update_data next = *update_data; next.prefix = NULL; - oidcpy(&next.oid, null_oid()); - oidcpy(&next.suboid, null_oid()); + oidcpy(&next.oid, null_oid(the_hash_algo)); + oidcpy(&next.suboid, null_oid(the_hash_algo)); cp.dir = update_data->sm_path; cp.git_cmd = 1; @@ -3057,7 +3057,7 @@ static int module_set_url(int argc, const char **argv, const char *prefix, if (argc != 2 || !(path = argv[0]) || !(newurl = argv[1])) usage_with_options(usage, options); - sub = submodule_from_path(the_repository, null_oid(), path); + sub = submodule_from_path(the_repository, null_oid(the_hash_algo), path); if (!sub) die(_("no submodule mapping found in .gitmodules for path '%s'"), @@ -3113,7 +3113,7 @@ static int module_set_branch(int argc, const char **argv, const char *prefix, if (argc != 1 || !(path = argv[0])) usage_with_options(usage, options); - sub = submodule_from_path(the_repository, null_oid(), path); + sub = submodule_from_path(the_repository, null_oid(the_hash_algo), path); if (!sub) die(_("no submodule mapping found in .gitmodules for path '%s'"), diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index e20c120913e5e6..73d431a0dde285 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -505,7 +505,7 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size, * has not been resolved yet. */ oidclr(&obj_list[nr].oid, the_repository->hash_algo); - add_delta_to_list(nr, null_oid(), base_offset, + add_delta_to_list(nr, null_oid(the_hash_algo), base_offset, delta_data, delta_size); return; } diff --git a/builtin/update-ref.c b/builtin/update-ref.c index 4d35bdc4b4b579..3796b9844d1b2c 100644 --- a/builtin/update-ref.c +++ b/builtin/update-ref.c @@ -500,7 +500,7 @@ static void parse_cmd_symref_verify(struct ref_transaction *transaction, */ old_target = parse_next_refname(&next); if (!old_target) - oidcpy(&old_oid, null_oid()); + oidcpy(&old_oid, null_oid(the_hash_algo)); if (*next != line_termination) die("symref-verify %s: extra input: %s", refname, next); diff --git a/builtin/worktree.c b/builtin/worktree.c index 48448a83550944..87ccd47794cbf7 100644 --- a/builtin/worktree.c +++ b/builtin/worktree.c @@ -578,7 +578,7 @@ static int add_worktree(const char *path, const char *refname, strvec_pushl(&opt.env, "GIT_DIR", "GIT_WORK_TREE", NULL); strvec_pushl(&opt.args, - oid_to_hex(null_oid()), + oid_to_hex(null_oid(the_hash_algo)), oid_to_hex(&commit->object.oid), "1", NULL); diff --git a/combine-diff.c b/combine-diff.c index 9527f3160d8c22..553bf59fed6734 100644 --- a/combine-diff.c +++ b/combine-diff.c @@ -1066,7 +1066,7 @@ static void show_patch_diff(struct combine_diff_path *elem, int num_parent, &result_size, NULL, NULL); } else if (textconv) { struct diff_filespec *df = alloc_filespec(elem->path); - fill_filespec(df, null_oid(), 0, st.st_mode); + fill_filespec(df, null_oid(the_hash_algo), 0, st.st_mode); result_size = fill_textconv(opt->repo, textconv, df, &result); free_filespec(df); } else if (0 <= (fd = open(elem->path, O_RDONLY))) { diff --git a/diff-lib.c b/diff-lib.c index 353b473ed52e41..244468dd1a2b30 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -172,7 +172,7 @@ void run_diff_files(struct rev_info *revs, unsigned int option) * these from (stage - 2). */ dpath = combine_diff_path_new(ce->name, ce_namelen(ce), - wt_mode, null_oid(), 2); + wt_mode, null_oid(the_hash_algo), 2); while (i < entries) { struct cache_entry *nce = istate->cache[i]; @@ -257,7 +257,7 @@ void run_diff_files(struct rev_info *revs, unsigned int option) ce_intent_to_add(ce)) { newmode = ce_mode_from_stat(ce, st.st_mode); diff_addremove(&revs->diffopt, '+', newmode, - null_oid(), 0, ce->name, 0); + null_oid(the_hash_algo), 0, ce->name, 0); continue; } @@ -274,7 +274,7 @@ void run_diff_files(struct rev_info *revs, unsigned int option) } oldmode = ce->ce_mode; old_oid = &ce->oid; - new_oid = changed ? null_oid() : &ce->oid; + new_oid = changed ? null_oid(the_hash_algo) : &ce->oid; diff_change(&revs->diffopt, oldmode, newmode, old_oid, new_oid, !is_null_oid(old_oid), @@ -330,7 +330,7 @@ static int get_stat_data(const struct cache_entry *ce, 0, dirty_submodule); if (changed) { mode = ce_mode_from_stat(ce, st.st_mode); - oid = null_oid(); + oid = null_oid(the_hash_algo); } } @@ -402,7 +402,7 @@ static int show_modified(struct rev_info *revs, p = combine_diff_path_new(new_entry->name, ce_namelen(new_entry), - mode, null_oid(), 2); + mode, null_oid(the_hash_algo), 2); p->parent[0].status = DIFF_STATUS_MODIFIED; p->parent[0].mode = new_entry->ce_mode; oidcpy(&p->parent[0].oid, &new_entry->oid); diff --git a/diff-no-index.c b/diff-no-index.c index 6f277892d3aef6..9739b2b268b93c 100644 --- a/diff-no-index.c +++ b/diff-no-index.c @@ -113,7 +113,8 @@ static void populate_from_stdin(struct diff_filespec *s) populate_common(s, &buf); } -static struct diff_filespec *noindex_filespec(const char *name, int mode, +static struct diff_filespec *noindex_filespec(const struct git_hash_algo *algop, + const char *name, int mode, enum special special) { struct diff_filespec *s; @@ -121,7 +122,7 @@ static struct diff_filespec *noindex_filespec(const char *name, int mode, if (!name) name = "/dev/null"; s = alloc_filespec(name); - fill_filespec(s, null_oid(), 0, mode); + fill_filespec(s, null_oid(algop), 0, mode); if (special == SPECIAL_STDIN) populate_from_stdin(s); else if (special == SPECIAL_PIPE) @@ -129,7 +130,7 @@ static struct diff_filespec *noindex_filespec(const char *name, int mode, return s; } -static int queue_diff(struct diff_options *o, +static int queue_diff(struct diff_options *o, const struct git_hash_algo *algop, const char *name1, const char *name2, int recursing) { int mode1 = 0, mode2 = 0; @@ -145,14 +146,14 @@ static int queue_diff(struct diff_options *o, if (S_ISDIR(mode1)) { /* 2 is file that is created */ - d1 = noindex_filespec(NULL, 0, SPECIAL_NONE); - d2 = noindex_filespec(name2, mode2, special2); + d1 = noindex_filespec(algop, NULL, 0, SPECIAL_NONE); + d2 = noindex_filespec(algop, name2, mode2, special2); name2 = NULL; mode2 = 0; } else { /* 1 is file that is deleted */ - d1 = noindex_filespec(name1, mode1, special1); - d2 = noindex_filespec(NULL, 0, SPECIAL_NONE); + d1 = noindex_filespec(algop, name1, mode1, special1); + d2 = noindex_filespec(algop, NULL, 0, SPECIAL_NONE); name1 = NULL; mode1 = 0; } @@ -217,7 +218,7 @@ static int queue_diff(struct diff_options *o, n2 = buffer2.buf; } - ret = queue_diff(o, n1, n2, 1); + ret = queue_diff(o, algop, n1, n2, 1); } string_list_clear(&p1, 0); string_list_clear(&p2, 0); @@ -234,8 +235,8 @@ static int queue_diff(struct diff_options *o, SWAP(special1, special2); } - d1 = noindex_filespec(name1, mode1, special1); - d2 = noindex_filespec(name2, mode2, special2); + d1 = noindex_filespec(algop, name1, mode1, special1); + d2 = noindex_filespec(algop, name2, mode2, special2); diff_queue(&diff_queued_diff, d1, d2); return 0; } @@ -297,9 +298,8 @@ static const char * const diff_no_index_usage[] = { NULL }; -int diff_no_index(struct rev_info *revs, - int implicit_no_index, - int argc, const char **argv) +int diff_no_index(struct rev_info *revs, const struct git_hash_algo *algop, + int implicit_no_index, int argc, const char **argv) { int i, no_index; int ret = 1; @@ -354,7 +354,7 @@ int diff_no_index(struct rev_info *revs, setup_diff_pager(&revs->diffopt); revs->diffopt.flags.exit_with_status = 1; - if (queue_diff(&revs->diffopt, paths[0], paths[1], 0)) + if (queue_diff(&revs->diffopt, algop, paths[0], paths[1], 0)) goto out; diff_set_mnemonic_prefix(&revs->diffopt, "1/", "2/"); diffcore_std(&revs->diffopt); diff --git a/diff.c b/diff.c index 4f802b4251c712..4ce5933a4421d3 100644 --- a/diff.c +++ b/diff.c @@ -4346,7 +4346,7 @@ static struct diff_tempfile *prepare_temp_file(struct repository *r, die_errno("readlink(%s)", one->path); prep_temp_blob(r->index, one->path, temp, sb.buf, sb.len, (one->oid_valid ? - &one->oid : null_oid()), + &one->oid : null_oid(the_hash_algo)), (one->oid_valid ? one->mode : S_IFLNK)); strbuf_release(&sb); @@ -4355,7 +4355,7 @@ static struct diff_tempfile *prepare_temp_file(struct repository *r, /* we can borrow from the file in the work tree */ temp->name = one->path; if (!one->oid_valid) - oid_to_hex_r(temp->hex, null_oid()); + oid_to_hex_r(temp->hex, null_oid(the_hash_algo)); else oid_to_hex_r(temp->hex, &one->oid); /* Even though we may sometimes borrow the @@ -6649,8 +6649,8 @@ static void create_filepairs_for_header_only_notifications(struct diff_options * one = alloc_filespec(e->key); two = alloc_filespec(e->key); - fill_filespec(one, null_oid(), 0, 0); - fill_filespec(two, null_oid(), 0, 0); + fill_filespec(one, null_oid(the_hash_algo), 0, 0); + fill_filespec(two, null_oid(the_hash_algo), 0, 0); p = diff_queue(q, one, two); p->status = DIFF_STATUS_MODIFIED; } diff --git a/diff.h b/diff.h index 0a566f5531ac42..0822111c62a4f7 100644 --- a/diff.h +++ b/diff.h @@ -656,7 +656,7 @@ void flush_one_hunk(struct object_id *result, struct git_hash_ctx *ctx); int diff_result_code(struct rev_info *); -int diff_no_index(struct rev_info *, +int diff_no_index(struct rev_info *, const struct git_hash_algo *algop, int implicit_no_index, int, const char **); int index_differs_from(struct repository *r, const char *def, diff --git a/dir.c b/dir.c index cbd82be6c91637..28b0e03feb498a 100644 --- a/dir.c +++ b/dir.c @@ -4035,7 +4035,7 @@ static void connect_wt_gitdir_in_nested(const char *sub_worktree, */ i++; - sub = submodule_from_path(&subrepo, null_oid(), ce->name); + sub = submodule_from_path(&subrepo, null_oid(the_hash_algo), ce->name); if (!sub || !is_submodule_active(&subrepo, ce->name)) /* .gitmodules broken or inactive sub */ continue; diff --git a/grep.c b/grep.c index 4e155ee9e66367..9284b5741f5aca 100644 --- a/grep.c +++ b/grep.c @@ -1517,7 +1517,7 @@ static int fill_textconv_grep(struct repository *r, fill_filespec(df, gs->identifier, 1, 0100644); break; case GREP_SOURCE_FILE: - fill_filespec(df, null_oid(), 0, 0100644); + fill_filespec(df, null_oid(r->hash_algo), 0, 0100644); break; default: BUG("attempt to textconv something without a path?"); diff --git a/hash.c b/hash.c index 4d9f98f72c2fed..4a04ecb50e8a0b 100644 --- a/hash.c +++ b/hash.c @@ -1,5 +1,3 @@ -#define USE_THE_REPOSITORY_VARIABLE - #include "git-compat-util.h" #include "hash.h" #include "hex.h" @@ -232,9 +230,9 @@ const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = { } }; -const struct object_id *null_oid(void) +const struct object_id *null_oid(const struct git_hash_algo *algop) { - return the_hash_algo->null_oid; + return algop->null_oid; } const char *empty_tree_oid_hex(const struct git_hash_algo *algop) diff --git a/hash.h b/hash.h index 55d67ed83f3ed9..a9ef76fc8e70d6 100644 --- a/hash.h +++ b/hash.h @@ -340,7 +340,7 @@ static inline int hash_algo_by_ptr(const struct git_hash_algo *p) const struct git_hash_algo *unsafe_hash_algo(const struct git_hash_algo *algop); -const struct object_id *null_oid(void); +const struct object_id *null_oid(const struct git_hash_algo *algop); static inline int hashcmp(const unsigned char *sha1, const unsigned char *sha2, const struct git_hash_algo *algop) { diff --git a/log-tree.c b/log-tree.c index 8b184d6776344b..5dd1b63076f20c 100644 --- a/log-tree.c +++ b/log-tree.c @@ -499,7 +499,7 @@ void log_write_email_headers(struct rev_info *opt, struct commit *commit, { struct strbuf headers = STRBUF_INIT; const char *name = oid_to_hex(opt->zero_commit ? - null_oid() : &commit->object.oid); + null_oid(the_hash_algo) : &commit->object.oid); *need_8bit_cte_p = 0; /* unknown */ diff --git a/merge-ort.c b/merge-ort.c index 46e78c3ffa68e6..eb3a834652ab4d 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -1817,7 +1817,7 @@ static int merge_submodule(struct merge_options *opt, BUG("submodule deleted on one side; this should be handled outside of merge_submodule()"); if ((sub_not_initialized = repo_submodule_init(&subrepo, - opt->repo, path, null_oid()))) { + opt->repo, path, null_oid(the_hash_algo)))) { path_msg(opt, CONFLICT_SUBMODULE_NOT_INITIALIZED, 0, path, NULL, NULL, NULL, _("Failed to merge submodule %s (not checked out)"), @@ -2199,7 +2199,7 @@ static int handle_content_merge(struct merge_options *opt, two_way = ((S_IFMT & o->mode) != (S_IFMT & a->mode)); merge_status = merge_3way(opt, path, - two_way ? null_oid() : &o->oid, + two_way ? null_oid(the_hash_algo) : &o->oid, &a->oid, &b->oid, pathnames, extra_marker_size, &result_buf); @@ -2231,7 +2231,7 @@ static int handle_content_merge(struct merge_options *opt, } else if (S_ISGITLINK(a->mode)) { int two_way = ((S_IFMT & o->mode) != (S_IFMT & a->mode)); clean = merge_submodule(opt, pathnames[0], - two_way ? null_oid() : &o->oid, + two_way ? null_oid(the_hash_algo) : &o->oid, &a->oid, &b->oid, &result->oid); if (clean < 0) return -1; @@ -2739,7 +2739,7 @@ static void apply_directory_rename_modifications(struct merge_options *opt, assert(!new_ci->match_mask); new_ci->dirmask = 0; new_ci->stages[1].mode = 0; - oidcpy(&new_ci->stages[1].oid, null_oid()); + oidcpy(&new_ci->stages[1].oid, null_oid(the_hash_algo)); /* * Now that we have the file information in new_ci, make sure @@ -2752,7 +2752,7 @@ static void apply_directory_rename_modifications(struct merge_options *opt, continue; /* zero out any entries related to files */ ci->stages[i].mode = 0; - oidcpy(&ci->stages[i].oid, null_oid()); + oidcpy(&ci->stages[i].oid, null_oid(the_hash_algo)); } /* Now we want to focus on new_ci, so reassign ci to it. */ @@ -3122,7 +3122,7 @@ static int process_renames(struct merge_options *opt, if (type_changed) { /* rename vs. typechange */ /* Mark the original as resolved by removal */ - memcpy(&oldinfo->stages[0].oid, null_oid(), + memcpy(&oldinfo->stages[0].oid, null_oid(the_hash_algo), sizeof(oldinfo->stages[0].oid)); oldinfo->stages[0].mode = 0; oldinfo->filemask &= 0x06; @@ -3994,7 +3994,7 @@ static int process_entry(struct merge_options *opt, if (ci->filemask & (1 << i)) continue; ci->stages[i].mode = 0; - oidcpy(&ci->stages[i].oid, null_oid()); + oidcpy(&ci->stages[i].oid, null_oid(the_hash_algo)); } } else if (ci->df_conflict && ci->merged.result.mode != 0) { /* @@ -4041,7 +4041,7 @@ static int process_entry(struct merge_options *opt, continue; /* zero out any entries related to directories */ new_ci->stages[i].mode = 0; - oidcpy(&new_ci->stages[i].oid, null_oid()); + oidcpy(&new_ci->stages[i].oid, null_oid(the_hash_algo)); } /* @@ -4163,11 +4163,11 @@ static int process_entry(struct merge_options *opt, new_ci->merged.result.mode = ci->stages[2].mode; oidcpy(&new_ci->merged.result.oid, &ci->stages[2].oid); new_ci->stages[1].mode = 0; - oidcpy(&new_ci->stages[1].oid, null_oid()); + oidcpy(&new_ci->stages[1].oid, null_oid(the_hash_algo)); new_ci->filemask = 5; if ((S_IFMT & b_mode) != (S_IFMT & o_mode)) { new_ci->stages[0].mode = 0; - oidcpy(&new_ci->stages[0].oid, null_oid()); + oidcpy(&new_ci->stages[0].oid, null_oid(the_hash_algo)); new_ci->filemask = 4; } @@ -4175,11 +4175,11 @@ static int process_entry(struct merge_options *opt, ci->merged.result.mode = ci->stages[1].mode; oidcpy(&ci->merged.result.oid, &ci->stages[1].oid); ci->stages[2].mode = 0; - oidcpy(&ci->stages[2].oid, null_oid()); + oidcpy(&ci->stages[2].oid, null_oid(the_hash_algo)); ci->filemask = 3; if ((S_IFMT & a_mode) != (S_IFMT & o_mode)) { ci->stages[0].mode = 0; - oidcpy(&ci->stages[0].oid, null_oid()); + oidcpy(&ci->stages[0].oid, null_oid(the_hash_algo)); ci->filemask = 2; } @@ -4304,7 +4304,7 @@ static int process_entry(struct merge_options *opt, /* Deleted on both sides */ ci->merged.is_null = 1; ci->merged.result.mode = 0; - oidcpy(&ci->merged.result.oid, null_oid()); + oidcpy(&ci->merged.result.oid, null_oid(the_hash_algo)); assert(!ci->df_conflict); ci->merged.clean = !ci->path_conflict; } diff --git a/merge-recursive.c b/merge-recursive.c index 884ccf99a58d3b..9aedffc546b230 100644 --- a/merge-recursive.c +++ b/merge-recursive.c @@ -502,7 +502,7 @@ static int get_tree_entry_if_blob(struct repository *r, ret = get_tree_entry(r, tree, path, &dfs->oid, &dfs->mode); if (S_ISDIR(dfs->mode)) { - oidcpy(&dfs->oid, null_oid()); + oidcpy(&dfs->oid, null_oid(the_hash_algo)); dfs->mode = 0; } return ret; @@ -1238,7 +1238,7 @@ static int merge_submodule(struct merge_options *opt, if (is_null_oid(b)) return 0; - if (repo_submodule_init(&subrepo, opt->repo, path, null_oid())) { + if (repo_submodule_init(&subrepo, opt->repo, path, null_oid(the_hash_algo))) { output(opt, 1, _("Failed to merge submodule %s (not checked out)"), path); return 0; } @@ -1698,7 +1698,7 @@ static int handle_file_collision(struct merge_options *opt, /* Store things in diff_filespecs for functions that need it */ null.path = (char *)collide_path; - oidcpy(&null.oid, null_oid()); + oidcpy(&null.oid, null_oid(the_hash_algo)); null.mode = 0; if (merge_mode_and_contents(opt, &null, a, b, collide_path, @@ -2897,14 +2897,14 @@ static int process_renames(struct merge_options *opt, dst_other.mode = ren1->dst_entry->stages[other_stage].mode; try_merge = 0; - if (oideq(&src_other.oid, null_oid()) && + if (oideq(&src_other.oid, null_oid(the_hash_algo)) && ren1->dir_rename_original_type == 'A') { setup_rename_conflict_info(RENAME_VIA_DIR, opt, ren1, NULL); } else if (renamed_to_self) { setup_rename_conflict_info(RENAME_NORMAL, opt, ren1, NULL); - } else if (oideq(&src_other.oid, null_oid())) { + } else if (oideq(&src_other.oid, null_oid(the_hash_algo))) { setup_rename_conflict_info(RENAME_DELETE, opt, ren1, NULL); } else if ((dst_other.mode == ren1->pair->two->mode) && @@ -2923,7 +2923,7 @@ static int process_renames(struct merge_options *opt, 1, /* update_cache */ 0 /* update_wd */)) clean_merge = -1; - } else if (!oideq(&dst_other.oid, null_oid())) { + } else if (!oideq(&dst_other.oid, null_oid(the_hash_algo))) { /* * Probably not a clean merge, but it's * premature to set clean_merge to 0 here, diff --git a/notes-merge.c b/notes-merge.c index 67a472020dedd9..5008faef450ca3 100644 --- a/notes-merge.c +++ b/notes-merge.c @@ -617,7 +617,7 @@ int notes_merge(struct notes_merge_options *o, if (repo_get_merge_bases(the_repository, local, remote, &bases) < 0) exit(128); if (!bases) { - base_oid = null_oid(); + base_oid = null_oid(the_hash_algo); base_tree_oid = the_hash_algo->empty_tree; if (o->verbosity >= 4) printf("No merge base found; doing history-less merge\n"); diff --git a/notes.c b/notes.c index f5344230505afb..ce5a1006a8332f 100644 --- a/notes.c +++ b/notes.c @@ -1353,7 +1353,7 @@ int copy_note(struct notes_tree *t, if (note) return add_note(t, to_obj, note, combine_notes); else if (existing_note) - return add_note(t, to_obj, null_oid(), combine_notes); + return add_note(t, to_obj, null_oid(the_hash_algo), combine_notes); return 0; } diff --git a/object-file.c b/object-file.c index de603e2ca8cdbf..772c311f188a22 100644 --- a/object-file.c +++ b/object-file.c @@ -2405,7 +2405,7 @@ static int index_mem(struct index_state *istate, opts.strict = 1; opts.error_func = hash_format_check_report; - if (fsck_buffer(null_oid(), type, buf, size, &opts)) + if (fsck_buffer(null_oid(the_hash_algo), type, buf, size, &opts)) die(_("refusing to create malformed object")); fsck_finish(&opts); } diff --git a/parse-options-cb.c b/parse-options-cb.c index 166d35e0ebd7f4..50c8afe4121bec 100644 --- a/parse-options-cb.c +++ b/parse-options-cb.c @@ -145,7 +145,7 @@ int parse_opt_object_id(const struct option *opt, const char *arg, int unset) struct object_id *target = opt->value; if (unset) { - oidcpy(target, null_oid()); + oidcpy(target, null_oid(the_hash_algo)); return 0; } if (!arg) diff --git a/range-diff.c b/range-diff.c index 9501c358a84174..8a2dcbee322e72 100644 --- a/range-diff.c +++ b/range-diff.c @@ -467,7 +467,7 @@ static struct diff_filespec *get_filespec(const char *name, const char *p) { struct diff_filespec *spec = alloc_filespec(name); - fill_filespec(spec, null_oid(), 0, 0100644); + fill_filespec(spec, null_oid(the_hash_algo), 0, 0100644); spec->data = (char *)p; spec->size = strlen(p); spec->should_munmap = 0; diff --git a/read-cache.c b/read-cache.c index a012cb22368895..2f9e21c897d1f5 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1735,7 +1735,7 @@ static int verify_hdr(const struct cache_header *hdr, unsigned long size) end = (unsigned char *)hdr + size; start = end - the_hash_algo->rawsz; oidread(&oid, start, the_repository->hash_algo); - if (oideq(&oid, null_oid())) + if (oideq(&oid, null_oid(the_hash_algo))) return 0; the_hash_algo->init_fn(&c); diff --git a/refs.c b/refs.c index 118465271d7275..215294115b4bbd 100644 --- a/refs.c +++ b/refs.c @@ -1377,7 +1377,7 @@ int ref_transaction_create(struct ref_transaction *transaction, return 1; } return ref_transaction_update(transaction, refname, new_oid, - null_oid(), new_target, NULL, flags, + null_oid(the_hash_algo), new_target, NULL, flags, msg, err); } @@ -1396,7 +1396,7 @@ int ref_transaction_delete(struct ref_transaction *transaction, if (old_target && !(flags & REF_NO_DEREF)) BUG("delete cannot operate on symrefs with deref mode"); return ref_transaction_update(transaction, refname, - null_oid(), old_oid, + null_oid(the_hash_algo), old_oid, NULL, old_target, flags, msg, err); } @@ -2160,7 +2160,7 @@ struct ref_store *repo_get_submodule_ref_store(struct repository *repo, subrepo = xmalloc(sizeof(*subrepo)); if (repo_submodule_init(subrepo, repo, submodule, - null_oid())) { + null_oid(the_hash_algo))) { free(subrepo); goto done; } @@ -2345,14 +2345,14 @@ static int run_transaction_hook(struct ref_transaction *transaction, strbuf_reset(&buf); if (!(update->flags & REF_HAVE_OLD)) - strbuf_addf(&buf, "%s ", oid_to_hex(null_oid())); + strbuf_addf(&buf, "%s ", oid_to_hex(null_oid(the_hash_algo))); else if (update->old_target) strbuf_addf(&buf, "ref:%s ", update->old_target); else strbuf_addf(&buf, "%s ", oid_to_hex(&update->old_oid)); if (!(update->flags & REF_HAVE_NEW)) - strbuf_addf(&buf, "%s ", oid_to_hex(null_oid())); + strbuf_addf(&buf, "%s ", oid_to_hex(null_oid(the_hash_algo))); else if (update->new_target) strbuf_addf(&buf, "ref:%s ", update->new_target); else @@ -2794,7 +2794,7 @@ static int migrate_one_ref(const char *refname, const char *referent UNUSED, con if (ret < 0) goto done; - ret = ref_transaction_update(data->transaction, refname, NULL, null_oid(), + ret = ref_transaction_update(data->transaction, refname, NULL, null_oid(the_hash_algo), symref_target.buf, NULL, REF_SKIP_CREATE_REFLOG | REF_NO_DEREF, NULL, data->errbuf); if (ret < 0) diff --git a/refs/debug.c b/refs/debug.c index fbc4df08b43ca7..7c61b775e4e500 100644 --- a/refs/debug.c +++ b/refs/debug.c @@ -217,7 +217,7 @@ static int debug_read_raw_ref(struct ref_store *ref_store, const char *refname, struct debug_ref_store *drefs = (struct debug_ref_store *)ref_store; int res = 0; - oidcpy(oid, null_oid()); + oidcpy(oid, null_oid(ref_store->repo->hash_algo)); res = drefs->refs->be->read_raw_ref(drefs->refs, refname, oid, referent, type, failure_errno); diff --git a/refs/files-backend.c b/refs/files-backend.c index 6c6e67dc1c48ed..ba31cfdb93f151 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -1270,7 +1270,7 @@ static void prune_ref(struct files_ref_store *refs, struct ref_to_prune *r) ref_transaction_add_update( transaction, r->name, REF_NO_DEREF | REF_HAVE_NEW | REF_HAVE_OLD | REF_IS_PRUNING, - null_oid(), &r->oid, NULL, NULL, NULL, NULL); + null_oid(the_hash_algo), &r->oid, NULL, NULL, NULL, NULL); if (ref_transaction_commit(transaction, &err)) goto cleanup; diff --git a/reset.c b/reset.c index b22b1be792d031..bb590271811ec2 100644 --- a/reset.c +++ b/reset.c @@ -80,7 +80,7 @@ static int update_refs(const struct reset_head_opts *opts, } if (!ret && run_hook) run_hooks_l(the_repository, "post-checkout", - oid_to_hex(head ? head : null_oid()), + oid_to_hex(head ? head : null_oid(the_hash_algo)), oid_to_hex(oid), "1", NULL); strbuf_release(&msg); return ret; diff --git a/sequencer.c b/sequencer.c index ad0ab75c8d4dd7..c112d2e1c43e26 100644 --- a/sequencer.c +++ b/sequencer.c @@ -265,8 +265,8 @@ static struct update_ref_record *init_update_ref_record(const char *ref) CALLOC_ARRAY(rec, 1); - oidcpy(&rec->before, null_oid()); - oidcpy(&rec->after, null_oid()); + oidcpy(&rec->before, null_oid(the_hash_algo)); + oidcpy(&rec->after, null_oid(the_hash_algo)); /* This may fail, but that's fine, we will keep the null OID. */ refs_read_ref(get_main_ref_store(the_repository), ref, &rec->before); @@ -667,7 +667,7 @@ static int fast_forward_to(struct repository *r, if (!transaction || ref_transaction_update(transaction, "HEAD", to, unborn && !is_rebase_i(opts) ? - null_oid() : from, NULL, NULL, + null_oid(the_hash_algo) : from, NULL, NULL, 0, sb.buf, &err) || ref_transaction_commit(transaction, &err)) { ref_transaction_free(transaction); @@ -1301,7 +1301,7 @@ int update_head_with_reflog(const struct commit *old_head, 0, err); if (!transaction || ref_transaction_update(transaction, "HEAD", new_head, - old_head ? &old_head->object.oid : null_oid(), + old_head ? &old_head->object.oid : null_oid(the_hash_algo), NULL, NULL, 0, sb.buf, err) || ref_transaction_commit(transaction, err)) { ret = -1; @@ -4683,7 +4683,7 @@ static void create_autostash_internal(struct repository *r, write_file(path, "%s", oid_to_hex(&oid)); } else { refs_update_ref(get_main_ref_store(r), "", refname, - &oid, null_oid(), 0, UPDATE_REFS_DIE_ON_ERR); + &oid, null_oid(the_hash_algo), 0, UPDATE_REFS_DIE_ON_ERR); } printf(_("Created autostash: %s\n"), buf.buf); diff --git a/submodule-config.c b/submodule-config.c index a25059ed7f8d8f..d82b404b73e4f8 100644 --- a/submodule-config.c +++ b/submodule-config.c @@ -831,7 +831,7 @@ static int gitmodules_cb(const char *var, const char *value, parameter.cache = repo->submodule_cache; parameter.treeish_name = NULL; - parameter.gitmodules_oid = null_oid(); + parameter.gitmodules_oid = null_oid(the_hash_algo); parameter.overwrite = 1; return parse_config(var, value, ctx, ¶meter); diff --git a/submodule.c b/submodule.c index 0530e8cf24e045..0821507ecaa493 100644 --- a/submodule.c +++ b/submodule.c @@ -124,7 +124,7 @@ int update_path_in_gitmodules(const char *oldpath, const char *newpath) if (is_gitmodules_unmerged(the_repository->index)) die(_("Cannot change unmerged .gitmodules, resolve merge conflicts first")); - submodule = submodule_from_path(the_repository, null_oid(), oldpath); + submodule = submodule_from_path(the_repository, null_oid(the_hash_algo), oldpath); if (!submodule || !submodule->name) { warning(_("Could not find section in .gitmodules where path=%s"), oldpath); return -1; @@ -153,7 +153,7 @@ int remove_path_from_gitmodules(const char *path) if (is_gitmodules_unmerged(the_repository->index)) die(_("Cannot change unmerged .gitmodules, resolve merge conflicts first")); - submodule = submodule_from_path(the_repository, null_oid(), path); + submodule = submodule_from_path(the_repository, null_oid(the_hash_algo), path); if (!submodule || !submodule->name) { warning(_("Could not find section in .gitmodules where path=%s"), path); return -1; @@ -204,7 +204,7 @@ void set_diffopt_flags_from_submodule_config(struct diff_options *diffopt, const char *path) { const struct submodule *submodule = submodule_from_path(the_repository, - null_oid(), + null_oid(the_hash_algo), path); if (submodule) { const char *ignore; @@ -312,7 +312,7 @@ int is_tree_submodule_active(struct repository *repo, int is_submodule_active(struct repository *repo, const char *path) { - return is_tree_submodule_active(repo, null_oid(), path); + return is_tree_submodule_active(repo, null_oid(the_hash_algo), path); } int is_submodule_populated_gently(const char *path, int *return_error_code) @@ -778,7 +778,7 @@ const struct submodule *submodule_from_ce(const struct cache_entry *ce) if (!should_update_submodules()) return NULL; - return submodule_from_path(the_repository, null_oid(), ce->name); + return submodule_from_path(the_repository, null_oid(the_hash_algo), ce->name); } @@ -1062,7 +1062,7 @@ static int submodule_needs_pushing(struct repository *r, const char *path, struct oid_array *commits) { - if (!submodule_has_commits(r, path, null_oid(), commits)) + if (!submodule_has_commits(r, path, null_oid(the_hash_algo), commits)) /* * NOTE: We do consider it safe to return "no" here. The * correct answer would be "We do not know" instead of @@ -1126,7 +1126,7 @@ int find_unpushed_submodules(struct repository *r, const struct submodule *submodule; const char *path = NULL; - submodule = submodule_from_name(r, null_oid(), name->string); + submodule = submodule_from_name(r, null_oid(the_hash_algo), name->string); if (submodule) path = submodule->path; else @@ -1351,7 +1351,7 @@ static void calculate_changed_submodule_paths(struct repository *r, const struct submodule *submodule; const char *path = NULL; - submodule = submodule_from_name(r, null_oid(), name->string); + submodule = submodule_from_name(r, null_oid(the_hash_algo), name->string); if (submodule) path = submodule->path; else @@ -1360,7 +1360,7 @@ static void calculate_changed_submodule_paths(struct repository *r, if (!path) continue; - if (submodule_has_commits(r, path, null_oid(), &cs_data->new_commits)) { + if (submodule_has_commits(r, path, null_oid(the_hash_algo), &cs_data->new_commits)) { changed_submodule_data_clear(cs_data); *name->string = '\0'; } @@ -1602,7 +1602,7 @@ get_fetch_task_from_index(struct submodule_parallel_fetch *spf, if (!S_ISGITLINK(ce->ce_mode)) continue; - task = fetch_task_create(spf, ce->name, null_oid()); + task = fetch_task_create(spf, ce->name, null_oid(the_hash_algo)); if (!task) continue; @@ -2166,7 +2166,7 @@ int submodule_move_head(const char *path, const char *super_prefix, if (old_head && !is_submodule_populated_gently(path, error_code_ptr)) return 0; - sub = submodule_from_path(the_repository, null_oid(), path); + sub = submodule_from_path(the_repository, null_oid(the_hash_algo), path); if (!sub) BUG("could not get submodule information for '%s'", path); @@ -2376,7 +2376,7 @@ static void relocate_single_git_dir_into_superproject(const char *path, real_old_git_dir = real_pathdup(old_git_dir, 1); - sub = submodule_from_path(the_repository, null_oid(), path); + sub = submodule_from_path(the_repository, null_oid(the_hash_algo), path); if (!sub) die(_("could not lookup name for submodule '%s'"), path); @@ -2462,7 +2462,7 @@ void absorb_git_dir_into_superproject(const char *path, * superproject did not rewrite the git file links yet, * fix it now. */ - sub = submodule_from_path(the_repository, null_oid(), path); + sub = submodule_from_path(the_repository, null_oid(the_hash_algo), path); if (!sub) die(_("could not lookup name for submodule '%s'"), path); submodule_name_to_gitdir(&sub_gitdir, the_repository, sub->name); @@ -2594,7 +2594,7 @@ int submodule_to_gitdir(struct repository *repo, strbuf_addstr(buf, git_dir); } if (!is_git_directory(buf->buf)) { - sub = submodule_from_path(repo, null_oid(), submodule); + sub = submodule_from_path(repo, null_oid(the_hash_algo), submodule); if (!sub) { ret = -1; goto cleanup; diff --git a/t/helper/test-ref-store.c b/t/helper/test-ref-store.c index e00fce592b10ed..2ff67c067acf67 100644 --- a/t/helper/test-ref-store.c +++ b/t/helper/test-ref-store.c @@ -179,7 +179,7 @@ static int cmd_for_each_ref__exclude(struct ref_store *refs, const char **argv) static int cmd_resolve_ref(struct ref_store *refs, const char **argv) { - struct object_id oid = *null_oid(); + struct object_id oid = *null_oid(the_hash_algo); const char *refname = notnull(*argv++, "refname"); int resolve_flags = arg_flags(*argv++, "resolve-flags", empty_flags); int flags; diff --git a/t/helper/test-submodule-nested-repo-config.c b/t/helper/test-submodule-nested-repo-config.c index 6dce957153112a..2710341cd5db11 100644 --- a/t/helper/test-submodule-nested-repo-config.c +++ b/t/helper/test-submodule-nested-repo-config.c @@ -21,7 +21,7 @@ int cmd__submodule_nested_repo_config(int argc, const char **argv) setup_git_directory(); - if (repo_submodule_init(&subrepo, the_repository, argv[1], null_oid())) { + if (repo_submodule_init(&subrepo, the_repository, argv[1], null_oid(the_hash_algo))) { die_usage(argv, "Submodule not found."); } diff --git a/tree-diff.c b/tree-diff.c index 60c558c2b56373..e00fc2f450d116 100644 --- a/tree-diff.c +++ b/tree-diff.c @@ -181,7 +181,7 @@ static void emit_path(struct combine_diff_path ***tail, strbuf_add(base, path, pathlen); p = combine_diff_path_new(base->buf, base->len, mode, - oid ? oid : null_oid(), + oid ? oid : null_oid(the_hash_algo), nparent); strbuf_setlen(base, old_baselen); @@ -206,7 +206,7 @@ static void emit_path(struct combine_diff_path ***tail, mode_i = tp[i].entry.mode; } else { - oid_i = null_oid(); + oid_i = null_oid(the_hash_algo); mode_i = 0; } diff --git a/upload-pack.c b/upload-pack.c index 67063708b825be..02ce6336028047 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -1449,7 +1449,7 @@ void upload_pack(const int advertise_refs, const int stateless_rpc, for_each_namespaced_ref_1(send_ref, &data); if (!data.sent_capabilities) { const char *refname = "capabilities^{}"; - write_v0_ref(&data, refname, refname, null_oid()); + write_v0_ref(&data, refname, refname, null_oid(the_hash_algo)); } /* * fflush stdout before calling advertise_shallow_grafts because send_ref diff --git a/wt-status.c b/wt-status.c index 1da5732f57b115..454601afa15a95 100644 --- a/wt-status.c +++ b/wt-status.c @@ -1824,10 +1824,10 @@ void wt_status_get_state(struct repository *r, if (!sequencer_get_last_command(r, &action)) { if (action == REPLAY_PICK && !state->cherry_pick_in_progress) { state->cherry_pick_in_progress = 1; - oidcpy(&state->cherry_pick_head_oid, null_oid()); + oidcpy(&state->cherry_pick_head_oid, null_oid(the_hash_algo)); } else if (action == REPLAY_REVERT && !state->revert_in_progress) { state->revert_in_progress = 1; - oidcpy(&state->revert_head_oid, null_oid()); + oidcpy(&state->revert_head_oid, null_oid(the_hash_algo)); } } if (get_detached_from) diff --git a/xdiff-interface.c b/xdiff-interface.c index 3bd61f26e9043e..77712811ff1b77 100644 --- a/xdiff-interface.c +++ b/xdiff-interface.c @@ -181,7 +181,7 @@ void read_mmblob(mmfile_t *ptr, const struct object_id *oid) unsigned long size; enum object_type type; - if (oideq(oid, null_oid())) { + if (oideq(oid, null_oid(the_hash_algo))) { ptr->ptr = xstrdup(""); ptr->size = 0; return; From 2bfd3b368572cbf1ce287de09db08b7e7e429ecd Mon Sep 17 00:00:00 2001 From: Arnav Bhate Date: Mon, 10 Mar 2025 23:38:53 +0530 Subject: [PATCH 081/775] decorate: fix sign comparison warnings There are multiple instances where ints have been initialized with values of unsigned ints, and where negative values don't mean anything. When such ints are compared with unsigned ints, it causes sign comparison warnings. Also, some of these are used just as stand-ins for their initial values, never being modified, thus obscuring the specific conditions under which certain operations happen. Replace int with unsigned int for 2 variables, and replace the intermediate variables with their initial values for 2 other variables. Signed-off-by: Arnav Bhate Signed-off-by: Junio C Hamano --- decorate.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/decorate.c b/decorate.c index e161e13772891f..9f24925263ae29 100644 --- a/decorate.c +++ b/decorate.c @@ -3,8 +3,6 @@ * data. */ -#define DISABLE_SIGN_COMPARE_WARNINGS - #include "git-compat-util.h" #include "object.h" #include "decorate.h" @@ -16,9 +14,8 @@ static unsigned int hash_obj(const struct object *obj, unsigned int n) static void *insert_decoration(struct decoration *n, const struct object *base, void *decoration) { - int size = n->size; struct decoration_entry *entries = n->entries; - unsigned int j = hash_obj(base, size); + unsigned int j = hash_obj(base, n->size); while (entries[j].base) { if (entries[j].base == base) { @@ -26,7 +23,7 @@ static void *insert_decoration(struct decoration *n, const struct object *base, entries[j].decoration = decoration; return old; } - if (++j >= size) + if (++j >= n->size) j = 0; } entries[j].base = base; @@ -37,8 +34,8 @@ static void *insert_decoration(struct decoration *n, const struct object *base, static void grow_decoration(struct decoration *n) { - int i; - int old_size = n->size; + unsigned int i; + unsigned int old_size = n->size; struct decoration_entry *old_entries = n->entries; n->size = (old_size + 1000) * 3 / 2; @@ -59,9 +56,7 @@ static void grow_decoration(struct decoration *n) void *add_decoration(struct decoration *n, const struct object *obj, void *decoration) { - int nr = n->nr + 1; - - if (nr > n->size * 2 / 3) + if ((n->nr + 1) > n->size * 2 / 3) grow_decoration(n); return insert_decoration(n, obj, decoration); } From d007dc2a3ef75dd20934cda47162d2e0a7c2fab0 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Mon, 10 Mar 2025 16:57:41 +0100 Subject: [PATCH 082/775] git-fast-import.adoc: add missing LF in the BNF Signed-off-by: Luke Shumaker Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- Documentation/git-fast-import.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/git-fast-import.adoc b/Documentation/git-fast-import.adoc index 58a2eaa51a8034..8e0de618c0e4df 100644 --- a/Documentation/git-fast-import.adoc +++ b/Documentation/git-fast-import.adoc @@ -437,7 +437,7 @@ change to the project. original-oid? ('author' (SP )? SP LT GT SP LF)? 'committer' (SP )? SP LT GT SP LF - ('encoding' SP )? + ('encoding' SP LF)? data ('from' SP LF)? ('merge' SP LF)* From 73ca6d20016973669619fe2f30aea38a0aac0f84 Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Mon, 10 Mar 2025 16:57:42 +0100 Subject: [PATCH 083/775] fast-export: fix missing whitespace after switch "Documentation/CodingGuidelines" says that there should be whitespaces around operators like 'if', 'switch', 'for', etc. Let's fix this in "builtin/fast-export.c". Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- builtin/fast-export.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/builtin/fast-export.c b/builtin/fast-export.c index a5c82eef1ded4d..2bf787191a4d10 100644 --- a/builtin/fast-export.c +++ b/builtin/fast-export.c @@ -694,7 +694,7 @@ static void handle_commit(struct commit *commit, struct rev_info *rev, if (anonymize) { reencoded = anonymize_commit_message(); } else if (encoding) { - switch(reencode_mode) { + switch (reencode_mode) { case REENCODE_YES: reencoded = reencode_string(message, "UTF-8", encoding); break; @@ -828,7 +828,7 @@ static void handle_tag(const char *name, struct tag *tag) const char *signature = strstr(message, "\n-----BEGIN PGP SIGNATURE-----\n"); if (signature) - switch(signed_tag_mode) { + switch (signed_tag_mode) { case SIGNED_TAG_ABORT: die("encountered signed tag %s; use " "--signed-tags= to handle it", @@ -853,7 +853,7 @@ static void handle_tag(const char *name, struct tag *tag) tagged = tag->tagged; tagged_mark = get_object_mark(tagged); if (!tagged_mark) { - switch(tag_of_filtered_mode) { + switch (tag_of_filtered_mode) { case TAG_FILTERING_ABORT: die("tag %s tags unexported object; use " "--tag-of-filtered-object= to handle it", @@ -965,7 +965,7 @@ static void get_tags_and_duplicates(struct rev_cmdline_info *info) continue; } - switch(commit->object.type) { + switch (commit->object.type) { case OBJ_COMMIT: break; case OBJ_BLOB: From 3b24d86c56949ca0485bb279e49671b3942ad5a6 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Mon, 10 Mar 2025 16:57:43 +0100 Subject: [PATCH 084/775] fast-export: rename --signed-tags='warn' to 'warn-verbatim' The --signed-tags= option takes one of five arguments specifying how to handle signed tags during export. Among these arguments, 'strip' is to 'warn-strip' as 'verbatim' is to 'warn' (the unmentioned argument is 'abort', which stops the fast-export process entirely). That is, signatures are either stripped or copied verbatim while exporting, with or without a warning. Match the pattern and rename 'warn' to 'warn-verbatim' to make it clear that it instructs fast-export to copy signatures verbatim. To maintain backwards compatibility, 'warn' is still recognized as deprecated synonym of 'warn-verbatim'. Signed-off-by: Luke Shumaker Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- Documentation/git-fast-export.adoc | 6 +++--- builtin/fast-export.c | 8 ++++---- t/t9350-fast-export.sh | 18 ++++++++++++++++++ 3 files changed, 25 insertions(+), 7 deletions(-) diff --git a/Documentation/git-fast-export.adoc b/Documentation/git-fast-export.adoc index 752e4b9b01d7d8..ab9a315fa96051 100644 --- a/Documentation/git-fast-export.adoc +++ b/Documentation/git-fast-export.adoc @@ -27,7 +27,7 @@ OPTIONS Insert 'progress' statements every objects, to be shown by 'git fast-import' during import. ---signed-tags=(verbatim|warn|warn-strip|strip|abort):: +--signed-tags=(verbatim|warn-verbatim|warn-strip|strip|abort):: Specify how to handle signed tags. Since any transformation after the export can change the tag names (which can also happen when excluding revisions) the signatures will not match. @@ -36,8 +36,8 @@ When asking to 'abort' (which is the default), this program will die when encountering a signed tag. With 'strip', the tags will silently be made unsigned, with 'warn-strip' they will be made unsigned but a warning will be displayed, with 'verbatim', they will be silently -exported and with 'warn', they will be exported, but you will see a -warning. +exported and with 'warn-verbatim' (or 'warn', a deprecated synonym), +they will be exported, but you will see a warning. --tag-of-filtered-object=(abort|drop|rewrite):: Specify how to handle tags whose tagged object is filtered out. diff --git a/builtin/fast-export.c b/builtin/fast-export.c index 2bf787191a4d10..2de2adc30eda67 100644 --- a/builtin/fast-export.c +++ b/builtin/fast-export.c @@ -36,7 +36,7 @@ static const char *fast_export_usage[] = { }; static int progress; -static enum signed_tag_mode { SIGNED_TAG_ABORT, VERBATIM, WARN, WARN_STRIP, STRIP } signed_tag_mode = SIGNED_TAG_ABORT; +static enum signed_tag_mode { SIGNED_TAG_ABORT, VERBATIM, WARN_VERBATIM, WARN_STRIP, STRIP } signed_tag_mode = SIGNED_TAG_ABORT; static enum tag_of_filtered_mode { TAG_FILTERING_ABORT, DROP, REWRITE } tag_of_filtered_mode = TAG_FILTERING_ABORT; static enum reencode_mode { REENCODE_ABORT, REENCODE_YES, REENCODE_NO } reencode_mode = REENCODE_ABORT; static int fake_missing_tagger; @@ -62,8 +62,8 @@ static int parse_opt_signed_tag_mode(const struct option *opt, *val = SIGNED_TAG_ABORT; else if (!strcmp(arg, "verbatim") || !strcmp(arg, "ignore")) *val = VERBATIM; - else if (!strcmp(arg, "warn")) - *val = WARN; + else if (!strcmp(arg, "warn-verbatim") || !strcmp(arg, "warn")) + *val = WARN_VERBATIM; else if (!strcmp(arg, "warn-strip")) *val = WARN_STRIP; else if (!strcmp(arg, "strip")) @@ -833,7 +833,7 @@ static void handle_tag(const char *name, struct tag *tag) die("encountered signed tag %s; use " "--signed-tags= to handle it", oid_to_hex(&tag->object.oid)); - case WARN: + case WARN_VERBATIM: warning("exporting signed tag %s", oid_to_hex(&tag->object.oid)); /* fallthru */ diff --git a/t/t9350-fast-export.sh b/t/t9350-fast-export.sh index 40427883ec6dbf..cc110727fb1edd 100755 --- a/t/t9350-fast-export.sh +++ b/t/t9350-fast-export.sh @@ -253,6 +253,24 @@ test_expect_success 'signed-tags=verbatim' ' ' +test_expect_success 'signed-tags=warn-verbatim' ' + + git fast-export --signed-tags=warn-verbatim sign-your-name >output 2>err && + grep PGP output && + test -s err + +' + +# 'warn' is a backward-compatibility alias for 'warn-verbatim'; test +# that it keeps working. +test_expect_success 'signed-tags=warn' ' + + git fast-export --signed-tags=warn sign-your-name >output 2>err && + grep PGP output && + test -s err + +' + test_expect_success 'signed-tags=strip' ' git fast-export --signed-tags=strip sign-your-name > output && From 87f2a9195e7bea721d9cffe98383b4065d233f66 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Mon, 10 Mar 2025 16:57:44 +0100 Subject: [PATCH 085/775] git-fast-export.adoc: clarify why 'verbatim' may not be a good idea Signed-off-by: Luke Shumaker Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- Documentation/git-fast-export.adoc | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Documentation/git-fast-export.adoc b/Documentation/git-fast-export.adoc index ab9a315fa96051..2bb52261a0b995 100644 --- a/Documentation/git-fast-export.adoc +++ b/Documentation/git-fast-export.adoc @@ -29,15 +29,20 @@ OPTIONS --signed-tags=(verbatim|warn-verbatim|warn-strip|strip|abort):: Specify how to handle signed tags. Since any transformation - after the export can change the tag names (which can also happen - when excluding revisions) the signatures will not match. + after the export (or during the export, such as excluding + revisions) can change the hashes being signed, the signatures + may become invalid. + When asking to 'abort' (which is the default), this program will die when encountering a signed tag. With 'strip', the tags will silently be made unsigned, with 'warn-strip' they will be made unsigned but a warning will be displayed, with 'verbatim', they will be silently exported and with 'warn-verbatim' (or 'warn', a deprecated synonym), -they will be exported, but you will see a warning. +they will be exported, but you will see a warning. 'verbatim' and +'warn-verbatim' should only be used if you know that no transformation +affecting tags or any commit in their history will be performed by you +or by fast-export or fast-import, or if you do not care that the +resulting tag will have an invalid signature. --tag-of-filtered-object=(abort|drop|rewrite):: Specify how to handle tags whose tagged object is filtered out. From dda9bff3c55e45ee8dbfb49fd972c4b35fd4ba7e Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Mon, 10 Mar 2025 16:57:45 +0100 Subject: [PATCH 086/775] fast-export: do not modify memory from get_commit_buffer fast-export's helper function find_encoding() takes a `const char *`, but modifies that memory despite the `const`. Ultimately, this memory came from get_commit_buffer(), and you're not supposed to modify the memory that you get from get_commit_buffer(). So, get rid of find_encoding() in favor of commit.h:find_commit_header(), which gives back a string length, rather than mutating the memory to insert a '\0' terminator. Because find_commit_header() detects the "\n\n" string that separates the headers and the commit message, move the call to be above the `message = strstr(..., "\n\n")` call. This helps readability, and allows for the value of `encoding` to be used for a better value of "..." so that the same memory doesn't need to be checked twice. Introduce a `commit_buffer_cursor` variable to avoid writing an awkward `encoding ? encoding + encoding_len : committer_end` expression. Signed-off-by: Luke Shumaker Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- builtin/fast-export.c | 61 +++++++++++++++++++++++-------------------- 1 file changed, 33 insertions(+), 28 deletions(-) diff --git a/builtin/fast-export.c b/builtin/fast-export.c index 2de2adc30eda67..39d43c2a292d43 100644 --- a/builtin/fast-export.c +++ b/builtin/fast-export.c @@ -510,21 +510,6 @@ static void show_filemodify(struct diff_queue_struct *q, } } -static const char *find_encoding(const char *begin, const char *end) -{ - const char *needle = "\nencoding "; - char *bol, *eol; - - bol = memmem(begin, end ? end - begin : strlen(begin), - needle, strlen(needle)); - if (!bol) - return NULL; - bol += strlen(needle); - eol = strchrnul(bol, '\n'); - *eol = '\0'; - return bol; -} - static char *anonymize_ref_component(void) { static int counter; @@ -630,9 +615,11 @@ static void handle_commit(struct commit *commit, struct rev_info *rev, struct string_list *paths_of_changed_objects) { int saved_output_format = rev->diffopt.output_format; - const char *commit_buffer; + const char *commit_buffer, *commit_buffer_cursor; const char *author, *author_end, *committer, *committer_end; - const char *encoding, *message; + const char *encoding = NULL; + size_t encoding_len; + const char *message; char *reencoded = NULL; struct commit_list *p; const char *refname; @@ -641,21 +628,35 @@ static void handle_commit(struct commit *commit, struct rev_info *rev, rev->diffopt.output_format = DIFF_FORMAT_CALLBACK; parse_commit_or_die(commit); - commit_buffer = repo_get_commit_buffer(the_repository, commit, NULL); - author = strstr(commit_buffer, "\nauthor "); + commit_buffer_cursor = commit_buffer = repo_get_commit_buffer(the_repository, commit, NULL); + + author = strstr(commit_buffer_cursor, "\nauthor "); if (!author) die("could not find author in commit %s", oid_to_hex(&commit->object.oid)); author++; - author_end = strchrnul(author, '\n'); - committer = strstr(author_end, "\ncommitter "); + commit_buffer_cursor = author_end = strchrnul(author, '\n'); + + committer = strstr(commit_buffer_cursor, "\ncommitter "); if (!committer) die("could not find committer in commit %s", oid_to_hex(&commit->object.oid)); committer++; - committer_end = strchrnul(committer, '\n'); - message = strstr(committer_end, "\n\n"); - encoding = find_encoding(committer_end, message); + commit_buffer_cursor = committer_end = strchrnul(committer, '\n'); + + /* + * find_commit_header() gets a `+ 1` because + * commit_buffer_cursor points at the trailing "\n" at the end + * of the previous line, but find_commit_header() wants a + * pointer to the beginning of the next line. + */ + if (*commit_buffer_cursor == '\n') { + encoding = find_commit_header(commit_buffer_cursor + 1, "encoding", &encoding_len); + if (encoding) + commit_buffer_cursor = encoding + encoding_len; + } + + message = strstr(commit_buffer_cursor, "\n\n"); if (message) message += 2; @@ -694,16 +695,20 @@ static void handle_commit(struct commit *commit, struct rev_info *rev, if (anonymize) { reencoded = anonymize_commit_message(); } else if (encoding) { + char *buf; switch (reencode_mode) { case REENCODE_YES: - reencoded = reencode_string(message, "UTF-8", encoding); + buf = xstrfmt("%.*s", (int)encoding_len, encoding); + reencoded = reencode_string(message, "UTF-8", buf); + free(buf); break; case REENCODE_NO: break; case REENCODE_ABORT: - die("Encountered commit-specific encoding %s in commit " + die("Encountered commit-specific encoding %.*s in commit " "%s; use --reencode=[yes|no] to handle it", - encoding, oid_to_hex(&commit->object.oid)); + (int)encoding_len, encoding, + oid_to_hex(&commit->object.oid)); } } if (!commit->parents) @@ -715,7 +720,7 @@ static void handle_commit(struct commit *commit, struct rev_info *rev, (int)(author_end - author), author, (int)(committer_end - committer), committer); if (!reencoded && encoding) - printf("encoding %s\n", encoding); + printf("encoding %.*s\n", (int)encoding_len, encoding); printf("data %u\n%s", (unsigned)(reencoded ? strlen(reencoded) : message From d9cb0e6ff8b369b0410ac8fb11657f5096d74b8e Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Mon, 10 Mar 2025 16:57:46 +0100 Subject: [PATCH 087/775] fast-export, fast-import: add support for signed-commits fast-export has a --signed-tags= option that controls how to handle tag signatures. However, there is no equivalent for commit signatures; it just silently strips the signature out of the commit (analogously to --signed-tags=strip). While signatures are generally problematic for fast-export/fast-import (because hashes are likely to change), if they're going to support tag signatures, there's no reason to not also support commit signatures. So, implement a --signed-commits= option that mirrors the --signed-tags= option. On the fast-export side, try to be as much like signed-tags as possible, in both implementation and in user-interface. This will change the default behavior to '--signed-commits=abort' from what is now '--signed-commits=strip'. In order to provide an escape hatch for users of third-party tools that call fast-export and do not yet know of the --signed-commits= option, add an environment variable 'FAST_EXPORT_SIGNED_COMMITS_NOABORT=1' that changes the default to '--signed-commits=warn-strip'. Signed-off-by: Luke Shumaker Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- Documentation/git-fast-export.adoc | 11 +++ Documentation/git-fast-import.adoc | 18 +++++ builtin/fast-export.c | 123 ++++++++++++++++++++++++----- builtin/fast-import.c | 23 ++++++ t/t9350-fast-export.sh | 98 +++++++++++++++++++++++ 5 files changed, 253 insertions(+), 20 deletions(-) diff --git a/Documentation/git-fast-export.adoc b/Documentation/git-fast-export.adoc index 2bb52261a0b995..413a5274969b6c 100644 --- a/Documentation/git-fast-export.adoc +++ b/Documentation/git-fast-export.adoc @@ -44,6 +44,17 @@ affecting tags or any commit in their history will be performed by you or by fast-export or fast-import, or if you do not care that the resulting tag will have an invalid signature. +--signed-commits=(verbatim|warn-verbatim|warn-strip|strip|abort):: + Specify how to handle signed commits. Behaves exactly as + '--signed-tags', but for commits. Default is 'abort'. ++ +Earlier versions this command that did not have '--signed-commits' +behaved as if '--signed-commits=strip'. As an escape hatch for users +of tools that call 'git fast-export' but do not yet support +'--signed-commits', you may set the environment variable +'FAST_EXPORT_SIGNED_COMMITS_NOABORT=1' in order to change the default +from 'abort' to 'warn-strip'. + --tag-of-filtered-object=(abort|drop|rewrite):: Specify how to handle tags whose tagged object is filtered out. Since revisions and files to export can be limited by path, diff --git a/Documentation/git-fast-import.adoc b/Documentation/git-fast-import.adoc index 8e0de618c0e4df..7b107f5e8e9da5 100644 --- a/Documentation/git-fast-import.adoc +++ b/Documentation/git-fast-import.adoc @@ -431,12 +431,21 @@ and control the current import process. More detailed discussion Create or update a branch with a new commit, recording one logical change to the project. +//// +Yes, it's intentional that the 'gpgsig' line doesn't have a trailing +`LF`; the definition of `data` has a byte-count prefix, so it +doesn't need an `LF` to act as a terminator (and `data` also already +includes an optional trailing `LF?` just in case you want to include +one). +//// + .... 'commit' SP LF mark? original-oid? ('author' (SP )? SP LT GT SP LF)? 'committer' (SP )? SP LT GT SP LF + ('gpgsig' SP LF data)? ('encoding' SP LF)? data ('from' SP LF)? @@ -505,6 +514,15 @@ that was selected by the --date-format= command-line option. See ``Date Formats'' above for the set of supported formats, and their syntax. +`gpgsig` +^^^^^^^^ + +The optional `gpgsig` command is used to include a PGP/GPG signature +that signs the commit data. + +Here specifies which hashing algorithm is used for this +signature, either `sha1` or `sha256`. + `encoding` ^^^^^^^^^^ The optional `encoding` command indicates the encoding of the commit diff --git a/builtin/fast-export.c b/builtin/fast-export.c index 39d43c2a292d43..126980f724fd69 100644 --- a/builtin/fast-export.c +++ b/builtin/fast-export.c @@ -35,8 +35,11 @@ static const char *fast_export_usage[] = { NULL }; +enum sign_mode { SIGN_ABORT, SIGN_VERBATIM, SIGN_STRIP, SIGN_WARN_VERBATIM, SIGN_WARN_STRIP }; + static int progress; -static enum signed_tag_mode { SIGNED_TAG_ABORT, VERBATIM, WARN_VERBATIM, WARN_STRIP, STRIP } signed_tag_mode = SIGNED_TAG_ABORT; +static enum sign_mode signed_tag_mode = SIGN_ABORT; +static enum sign_mode signed_commit_mode = SIGN_ABORT; static enum tag_of_filtered_mode { TAG_FILTERING_ABORT, DROP, REWRITE } tag_of_filtered_mode = TAG_FILTERING_ABORT; static enum reencode_mode { REENCODE_ABORT, REENCODE_YES, REENCODE_NO } reencode_mode = REENCODE_ABORT; static int fake_missing_tagger; @@ -53,23 +56,24 @@ static int anonymize; static struct hashmap anonymized_seeds; static struct revision_sources revision_sources; -static int parse_opt_signed_tag_mode(const struct option *opt, +static int parse_opt_sign_mode(const struct option *opt, const char *arg, int unset) { - enum signed_tag_mode *val = opt->value; - - if (unset || !strcmp(arg, "abort")) - *val = SIGNED_TAG_ABORT; + enum sign_mode *val = opt->value; + if (unset) + return 0; + else if (!strcmp(arg, "abort")) + *val = SIGN_ABORT; else if (!strcmp(arg, "verbatim") || !strcmp(arg, "ignore")) - *val = VERBATIM; + *val = SIGN_VERBATIM; else if (!strcmp(arg, "warn-verbatim") || !strcmp(arg, "warn")) - *val = WARN_VERBATIM; + *val = SIGN_WARN_VERBATIM; else if (!strcmp(arg, "warn-strip")) - *val = WARN_STRIP; + *val = SIGN_WARN_STRIP; else if (!strcmp(arg, "strip")) - *val = STRIP; + *val = SIGN_STRIP; else - return error("Unknown signed-tags mode: %s", arg); + return error("Unknown %s mode: %s", opt->long_name, arg); return 0; } @@ -611,6 +615,43 @@ static void anonymize_ident_line(const char **beg, const char **end) *end = out->buf + out->len; } +/* + * find_commit_multiline_header is similar to find_commit_header, + * except that it handles multi-line headers, rather than simply + * returning the first line of the header. + * + * The returned string has had the ' ' line continuation markers + * removed, and points to allocated memory that must be free()d (not + * to memory within 'msg'). + * + * If the header is found, then *end is set to point at the '\n' in + * msg that immediately follows the header value. + */ +static const char *find_commit_multiline_header(const char *msg, + const char *key, + const char **end) +{ + struct strbuf val = STRBUF_INIT; + const char *bol, *eol; + size_t len; + + bol = find_commit_header(msg, key, &len); + if (!bol) + return NULL; + eol = bol + len; + strbuf_add(&val, bol, len); + + while (eol[0] == '\n' && eol[1] == ' ') { + bol = eol + 2; + eol = strchrnul(bol, '\n'); + strbuf_addch(&val, '\n'); + strbuf_add(&val, bol, eol - bol); + } + + *end = eol; + return strbuf_detach(&val, NULL); +} + static void handle_commit(struct commit *commit, struct rev_info *rev, struct string_list *paths_of_changed_objects) { @@ -619,6 +660,7 @@ static void handle_commit(struct commit *commit, struct rev_info *rev, const char *author, *author_end, *committer, *committer_end; const char *encoding = NULL; size_t encoding_len; + const char *signature_alg = NULL, *signature = NULL; const char *message; char *reencoded = NULL; struct commit_list *p; @@ -645,17 +687,25 @@ static void handle_commit(struct commit *commit, struct rev_info *rev, commit_buffer_cursor = committer_end = strchrnul(committer, '\n'); /* - * find_commit_header() gets a `+ 1` because - * commit_buffer_cursor points at the trailing "\n" at the end - * of the previous line, but find_commit_header() wants a + * find_commit_header() and find_commit_multiline_header() get + * a `+ 1` because commit_buffer_cursor points at the trailing + * "\n" at the end of the previous line, but they want a * pointer to the beginning of the next line. */ + if (*commit_buffer_cursor == '\n') { encoding = find_commit_header(commit_buffer_cursor + 1, "encoding", &encoding_len); if (encoding) commit_buffer_cursor = encoding + encoding_len; } + if (*commit_buffer_cursor == '\n') { + if ((signature = find_commit_multiline_header(commit_buffer_cursor + 1, "gpgsig", &commit_buffer_cursor))) + signature_alg = "sha1"; + else if ((signature = find_commit_multiline_header(commit_buffer_cursor + 1, "gpgsig-sha256", &commit_buffer_cursor))) + signature_alg = "sha256"; + } + message = strstr(commit_buffer_cursor, "\n\n"); if (message) message += 2; @@ -719,6 +769,31 @@ static void handle_commit(struct commit *commit, struct rev_info *rev, printf("%.*s\n%.*s\n", (int)(author_end - author), author, (int)(committer_end - committer), committer); + if (signature) { + switch (signed_commit_mode) { + case SIGN_ABORT: + die("encountered signed commit %s; use " + "--signed-commits= to handle it", + oid_to_hex(&commit->object.oid)); + case SIGN_WARN_VERBATIM: + warning("exporting signed commit %s", + oid_to_hex(&commit->object.oid)); + /* fallthru */ + case SIGN_VERBATIM: + printf("gpgsig %s\ndata %u\n%s", + signature_alg, + (unsigned)strlen(signature), + signature); + break; + case SIGN_WARN_STRIP: + warning("stripping signature from commit %s", + oid_to_hex(&commit->object.oid)); + /* fallthru */ + case SIGN_STRIP: + break; + } + free((char *)signature); + } if (!reencoded && encoding) printf("encoding %.*s\n", (int)encoding_len, encoding); printf("data %u\n%s", @@ -834,21 +909,21 @@ static void handle_tag(const char *name, struct tag *tag) "\n-----BEGIN PGP SIGNATURE-----\n"); if (signature) switch (signed_tag_mode) { - case SIGNED_TAG_ABORT: + case SIGN_ABORT: die("encountered signed tag %s; use " "--signed-tags= to handle it", oid_to_hex(&tag->object.oid)); - case WARN_VERBATIM: + case SIGN_WARN_VERBATIM: warning("exporting signed tag %s", oid_to_hex(&tag->object.oid)); /* fallthru */ - case VERBATIM: + case SIGN_VERBATIM: break; - case WARN_STRIP: + case SIGN_WARN_STRIP: warning("stripping signature from tag %s", oid_to_hex(&tag->object.oid)); /* fallthru */ - case STRIP: + case SIGN_STRIP: message_size = signature + 1 - message; break; } @@ -1194,6 +1269,7 @@ int cmd_fast_export(int argc, const char *prefix, struct repository *repo UNUSED) { + const char *env_signed_commits_noabort; struct rev_info revs; struct commit *commit; char *export_filename = NULL, @@ -1207,7 +1283,10 @@ int cmd_fast_export(int argc, N_("show progress after objects")), OPT_CALLBACK(0, "signed-tags", &signed_tag_mode, N_("mode"), N_("select handling of signed tags"), - parse_opt_signed_tag_mode), + parse_opt_sign_mode), + OPT_CALLBACK(0, "signed-commits", &signed_commit_mode, N_("mode"), + N_("select handling of signed commits"), + parse_opt_sign_mode), OPT_CALLBACK(0, "tag-of-filtered-object", &tag_of_filtered_mode, N_("mode"), N_("select handling of tags that tag filtered objects"), parse_opt_tag_of_filtered_mode), @@ -1248,6 +1327,10 @@ int cmd_fast_export(int argc, if (argc == 1) usage_with_options (fast_export_usage, options); + env_signed_commits_noabort = getenv("FAST_EXPORT_SIGNED_COMMITS_NOABORT"); + if (env_signed_commits_noabort && *env_signed_commits_noabort) + signed_commit_mode = SIGN_WARN_STRIP; + /* we handle encodings */ git_config(git_default_config, NULL); diff --git a/builtin/fast-import.c b/builtin/fast-import.c index d6a368a5665203..a5b33eb91ea228 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -2719,10 +2719,13 @@ static struct hash_list *parse_merge(unsigned int *count) static void parse_new_commit(const char *arg) { + static struct strbuf sig = STRBUF_INIT; static struct strbuf msg = STRBUF_INIT; + struct string_list siglines = STRING_LIST_INIT_NODUP; struct branch *b; char *author = NULL; char *committer = NULL; + char *sig_alg = NULL; char *encoding = NULL; struct hash_list *merge_list = NULL; unsigned int merge_count; @@ -2746,6 +2749,13 @@ static void parse_new_commit(const char *arg) } if (!committer) die("Expected committer but didn't get one"); + if (skip_prefix(command_buf.buf, "gpgsig ", &v)) { + sig_alg = xstrdup(v); + read_next_command(); + parse_data(&sig, 0, NULL); + read_next_command(); + } else + strbuf_setlen(&sig, 0); if (skip_prefix(command_buf.buf, "encoding ", &v)) { encoding = xstrdup(v); read_next_command(); @@ -2819,10 +2829,23 @@ static void parse_new_commit(const char *arg) strbuf_addf(&new_data, "encoding %s\n", encoding); + if (sig_alg) { + if (!strcmp(sig_alg, "sha1")) + strbuf_addstr(&new_data, "gpgsig "); + else if (!strcmp(sig_alg, "sha256")) + strbuf_addstr(&new_data, "gpgsig-sha256 "); + else + die("Expected gpgsig algorithm sha1 or sha256, got %s", sig_alg); + string_list_split_in_place(&siglines, sig.buf, "\n", -1); + strbuf_add_separated_string_list(&new_data, "\n ", &siglines); + strbuf_addch(&new_data, '\n'); + } strbuf_addch(&new_data, '\n'); strbuf_addbuf(&new_data, &msg); + string_list_clear(&siglines, 1); free(author); free(committer); + free(sig_alg); free(encoding); if (!store_object(OBJ_COMMIT, &new_data, NULL, &b->oid, next_mark)) diff --git a/t/t9350-fast-export.sh b/t/t9350-fast-export.sh index cc110727fb1edd..304bac5b1dfb0d 100755 --- a/t/t9350-fast-export.sh +++ b/t/t9350-fast-export.sh @@ -8,6 +8,7 @@ GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME . ./test-lib.sh +. "$TEST_DIRECTORY/lib-gpg.sh" test_expect_success 'setup' ' @@ -284,10 +285,107 @@ test_expect_success 'signed-tags=warn-strip' ' test -s err ' +test_expect_success GPG 'set up signed commit' ' + + # Generate a commit with both "gpgsig" and "encoding" set, so + # that we can test that fast-import gets the ordering correct + # between the two. + test_config i18n.commitEncoding ISO-8859-1 && + git checkout -f -b commit-signing main && + echo Sign your name >file-sign && + git add file-sign && + git commit -S -m "signed commit" && + COMMIT_SIGNING=$(git rev-parse --verify commit-signing) + +' + +test_expect_success GPG 'signed-commits default' ' + + sane_unset FAST_EXPORT_SIGNED_COMMITS_NOABORT && + test_must_fail git fast-export --reencode=no commit-signing && + + FAST_EXPORT_SIGNED_COMMITS_NOABORT=1 git fast-export --reencode=no commit-signing >output 2>err && + ! grep ^gpgsig output && + grep "^encoding ISO-8859-1" output && + test -s err && + sed "s/commit-signing/commit-strip-signing/" output | ( + cd new && + git fast-import && + STRIPPED=$(git rev-parse --verify refs/heads/commit-strip-signing) && + test $COMMIT_SIGNING != $STRIPPED + ) + +' + +test_expect_success GPG 'signed-commits=abort' ' + + test_must_fail git fast-export --signed-commits=abort commit-signing + +' + +test_expect_success GPG 'signed-commits=verbatim' ' + + git fast-export --signed-commits=verbatim --reencode=no commit-signing >output && + grep "^gpgsig sha" output && + grep "encoding ISO-8859-1" output && + ( + cd new && + git fast-import && + STRIPPED=$(git rev-parse --verify refs/heads/commit-signing) && + test $COMMIT_SIGNING = $STRIPPED + ) output 2>err && + grep "^gpgsig sha" output && + grep "encoding ISO-8859-1" output && + test -s err && + ( + cd new && + git fast-import && + STRIPPED=$(git rev-parse --verify refs/heads/commit-signing) && + test $COMMIT_SIGNING = $STRIPPED + ) output && + ! grep ^gpgsig output && + grep "^encoding ISO-8859-1" output && + sed "s/commit-signing/commit-strip-signing/" output | ( + cd new && + git fast-import && + STRIPPED=$(git rev-parse --verify refs/heads/commit-strip-signing) && + test $COMMIT_SIGNING != $STRIPPED + ) + +' + +test_expect_success GPG 'signed-commits=warn-strip' ' + + git fast-export --signed-commits=warn-strip --reencode=no commit-signing >output 2>err && + ! grep ^gpgsig output && + grep "^encoding ISO-8859-1" output && + test -s err && + sed "s/commit-signing/commit-strip-signing/" output | ( + cd new && + git fast-import && + STRIPPED=$(git rev-parse --verify refs/heads/commit-strip-signing) && + test $COMMIT_SIGNING != $STRIPPED + ) + +' + test_expect_success 'setup submodule' ' test_config_global protocol.file.allow always && git checkout -f main && + test_might_fail git update-ref -d refs/heads/commit-signing && mkdir sub && ( cd sub && From ab362fc6f4cf795a8e3e868cb6fc3ae0102c8d42 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 11 Mar 2025 14:25:00 -0700 Subject: [PATCH 088/775] t: document test_lazy_prereq The t/README file talked about test_set_prereq but lacked explanation on test_lazy_prereq, which is a more modern way to define prerequisites. Signed-off-by: Junio C Hamano --- t/README | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/t/README b/t/README index 53e5b4a7107416..3ce9f5a3939625 100644 --- a/t/README +++ b/t/README @@ -818,7 +818,7 @@ Skipping tests -------------- If you need to skip tests you should do so by using the three-arg form -of the test_* functions (see the "Test harness library" section +of the test_expect_* functions (see the "Test harness library" section below), e.g.: test_expect_success PERL 'I need Perl' ' @@ -965,6 +965,27 @@ see test-lib-functions.sh for the full list and their options. test_done fi + - test_lazy_prereq