Skip to content

Fix REF_DELTA chain bug in 'git index-pack' #745

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Apr 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -827,6 +827,7 @@ TEST_BUILTINS_OBJS += test-mergesort.o
TEST_BUILTINS_OBJS += test-mktemp.o
TEST_BUILTINS_OBJS += test-name-hash.o
TEST_BUILTINS_OBJS += test-online-cpus.o
TEST_BUILTINS_OBJS += test-pack-deltas.o
TEST_BUILTINS_OBJS += test-pack-mtimes.o
TEST_BUILTINS_OBJS += test-parse-options.o
TEST_BUILTINS_OBJS += test-parse-pathspec-file.o
Expand Down
58 changes: 32 additions & 26 deletions builtin/index-pack.c
Original file line number Diff line number Diff line change
Expand Up @@ -1107,8 +1107,8 @@ static void *threaded_second_pass(void *data)
set_thread_data(data);
for (;;) {
struct base_data *parent = NULL;
struct object_entry *child_obj;
struct base_data *child;
struct object_entry *child_obj = NULL;
struct base_data *child = NULL;

counter_lock();
display_progress(progress, nr_resolved_deltas);
Expand All @@ -1135,15 +1135,18 @@ static void *threaded_second_pass(void *data)
parent = list_first_entry(&work_head, struct base_data,
list);

if (parent->ref_first <= parent->ref_last) {
while (parent->ref_first <= parent->ref_last) {
int offset = ref_deltas[parent->ref_first++].obj_no;
child_obj = objects + offset;
if (child_obj->real_type != OBJ_REF_DELTA)
die("REF_DELTA at offset %"PRIuMAX" already resolved (duplicate base %s?)",
(uintmax_t) child_obj->idx.offset,
oid_to_hex(&parent->obj->idx.oid));
if (child_obj->real_type != OBJ_REF_DELTA) {
child_obj = NULL;
continue;
}
child_obj->real_type = parent->obj->real_type;
} else {
break;
}

if (!child_obj && parent->ofs_first <= parent->ofs_last) {
child_obj = objects +
ofs_deltas[parent->ofs_first++].obj_no;
assert(child_obj->real_type == OBJ_OFS_DELTA);
Expand Down Expand Up @@ -1176,29 +1179,32 @@ static void *threaded_second_pass(void *data)
}
work_unlock();

if (parent) {
child = resolve_delta(child_obj, parent);
if (!child->children_remaining)
FREE_AND_NULL(child->data);
} else {
child = make_base(child_obj, NULL);
if (child->children_remaining) {
/*
* Since this child has its own delta children,
* we will need this data in the future.
* Inflate now so that future iterations will
* have access to this object's data while
* outside the work mutex.
*/
child->data = get_data_from_pack(child_obj);
child->size = child_obj->size;
if (child_obj) {
if (parent) {
child = resolve_delta(child_obj, parent);
if (!child->children_remaining)
FREE_AND_NULL(child->data);
} else{
child = make_base(child_obj, NULL);
if (child->children_remaining) {
/*
* Since this child has its own delta children,
* we will need this data in the future.
* Inflate now so that future iterations will
* have access to this object's data while
* outside the work mutex.
*/
child->data = get_data_from_pack(child_obj);
child->size = child_obj->size;
}
}
}

work_lock();
if (parent)
parent->retain_data--;
if (child->data) {

if (child && child->data) {
/*
* This child has its own children, so add it to
* work_head.
Expand All @@ -1207,7 +1213,7 @@ static void *threaded_second_pass(void *data)
base_cache_used += child->size;
prune_base_data(NULL);
free_base_data(child);
} else {
} else if (child) {
/*
* This child does not have its own children. It may be
* the last descendant of its ancestors; free those
Expand Down
35 changes: 24 additions & 11 deletions ci/install-dependencies.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ begin_group "Install dependencies"

P4WHENCE=https://cdist2.perforce.com/perforce/r23.2
LFSWHENCE=https://github.com/github/git-lfs/releases/download/v$LINUX_GIT_LFS_VERSION
JGITWHENCE=https://repo.eclipse.org/content/groups/releases//org/eclipse/jgit/org.eclipse.jgit.pgm/6.8.0.202311291450-r/org.eclipse.jgit.pgm-6.8.0.202311291450-r.sh
JGITWHENCE=https://repo1.maven.org/maven2/org/eclipse/jgit/org.eclipse.jgit.pgm/6.8.0.202311291450-r/org.eclipse.jgit.pgm-6.8.0.202311291450-r.sh

# Make sudo a no-op and execute the command directly when running as root.
# While using sudo would be fine on most platforms when we are root already,
Expand All @@ -31,7 +31,7 @@ alpine-*)
;;
fedora-*|almalinux-*)
dnf -yq update >/dev/null &&
dnf -yq install shadow-utils sudo make gcc findutils diffutils perl python3 gettext zlib-devel expat-devel openssl-devel curl-devel pcre2-devel >/dev/null
dnf -yq install shadow-utils sudo make gcc findutils diffutils perl python3 gawk gettext zlib-devel expat-devel openssl-devel curl-devel pcre2-devel >/dev/null
;;
ubuntu-*|i386/ubuntu-*|debian-*)
# Required so that apt doesn't wait for user input on certain packages.
Expand Down Expand Up @@ -66,16 +66,29 @@ ubuntu-*|i386/ubuntu-*|debian-*)
mkdir --parents "$CUSTOM_PATH"

wget --quiet --directory-prefix="$CUSTOM_PATH" \
"$P4WHENCE/bin.linux26x86_64/p4d" "$P4WHENCE/bin.linux26x86_64/p4"
chmod a+x "$CUSTOM_PATH/p4d" "$CUSTOM_PATH/p4"

wget --quiet "$LFSWHENCE/git-lfs-linux-amd64-$LINUX_GIT_LFS_VERSION.tar.gz"
"$P4WHENCE/bin.linux26x86_64/p4d" \
"$P4WHENCE/bin.linux26x86_64/p4" &&
chmod a+x "$CUSTOM_PATH/p4d" "$CUSTOM_PATH/p4" || {
rm -f "$CUSTOM_PATH/p4"
rm -f "$CUSTOM_PATH/p4d"
echo >&2 "P4 download (optional) failed"
}

wget --quiet \
"$LFSWHENCE/git-lfs-linux-amd64-$LINUX_GIT_LFS_VERSION.tar.gz" &&
tar -xzf "git-lfs-linux-amd64-$LINUX_GIT_LFS_VERSION.tar.gz" \
-C "$CUSTOM_PATH" --strip-components=1 "git-lfs-$LINUX_GIT_LFS_VERSION/git-lfs"
rm "git-lfs-linux-amd64-$LINUX_GIT_LFS_VERSION.tar.gz"

wget --quiet "$JGITWHENCE" --output-document="$CUSTOM_PATH/jgit"
chmod a+x "$CUSTOM_PATH/jgit"
-C "$CUSTOM_PATH" --strip-components=1 \
"git-lfs-$LINUX_GIT_LFS_VERSION/git-lfs" &&
rm "git-lfs-linux-amd64-$LINUX_GIT_LFS_VERSION.tar.gz" || {
rm -f "$CUSTOM_PATH/git-lfs"
echo >&2 "LFS download (optional) failed"
}

wget --quiet "$JGITWHENCE" --output-document="$CUSTOM_PATH/jgit" &&
chmod a+x "$CUSTOM_PATH/jgit" || {
rm -f "$CUSTOM_PATH/jgit"
echo >&2 "JGit download (optional) failed"
}
;;
esac
;;
Expand Down
1 change: 1 addition & 0 deletions t/helper/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ test_tool_sources = [
'test-mktemp.c',
'test-name-hash.c',
'test-online-cpus.c',
'test-pack-deltas.c',
'test-pack-mtimes.c',
'test-parse-options.c',
'test-parse-pathspec-file.c',
Expand Down
143 changes: 143 additions & 0 deletions t/helper/test-pack-deltas.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
#define USE_THE_REPOSITORY_VARIABLE

#include "test-tool.h"
#include "git-compat-util.h"
#include "delta.h"
#include "git-zlib.h"
#include "hash.h"
#include "hex.h"
#include "pack.h"
#include "pack-objects.h"
#include "parse.h"
#include "setup.h"
#include "strbuf.h"
#include "string-list.h"

static const char usage_str[] = "test-tool pack-deltas <nr_entries>";

static unsigned long do_compress(void **pptr, unsigned long size)
{
git_zstream stream;
void *in, *out;
unsigned long maxsize;

git_deflate_init(&stream, 1);
maxsize = git_deflate_bound(&stream, size);

in = *pptr;
out = xmalloc(maxsize);
*pptr = out;

stream.next_in = in;
stream.avail_in = size;
stream.next_out = out;
stream.avail_out = maxsize;
while (git_deflate(&stream, Z_FINISH) == Z_OK)
; /* nothing */
git_deflate_end(&stream);

free(in);
return stream.total_out;
}

static void write_ref_delta(struct hashfile *f,
struct object_id *oid,
struct object_id *base)
{
unsigned char header[MAX_PACK_OBJECT_HEADER];
unsigned long size, base_size, delta_size, compressed_size, hdrlen;
enum object_type type;
void *base_buf, *delta_buf;
void *buf = repo_read_object_file(the_repository,
oid, &type,
&size);

if (!buf)
die("unable to read %s", oid_to_hex(oid));

base_buf = repo_read_object_file(the_repository,
base, &type,
&base_size);

if (!base_buf)
die("unable to read %s", oid_to_hex(base));

delta_buf = diff_delta(base_buf, base_size,
buf, size, &delta_size, 0);

compressed_size = do_compress(&delta_buf, delta_size);

hdrlen = encode_in_pack_object_header(header, sizeof(header),
OBJ_REF_DELTA, delta_size);
hashwrite(f, header, hdrlen);
hashwrite(f, base->hash, the_repository->hash_algo->rawsz);
hashwrite(f, delta_buf, compressed_size);

free(buf);
free(base_buf);
free(delta_buf);
}

int cmd__pack_deltas(int argc, const char **argv)
{
unsigned long n;
struct hashfile *f;
struct strbuf line = STRBUF_INIT;

if (argc != 2) {
usage(usage_str);
return -1;
}

if (!git_parse_ulong(argv[1], &n) || n != (uint32_t)n)
die("invalid number of objects: %s", argv[1]);

setup_git_directory();

f = hashfd(1, "<stdout>");
write_pack_header(f, n);

/* Read each line from stdin into 'line' */
while (strbuf_getline_lf(&line, stdin) != EOF) {
const char *type_str, *content_oid_str, *base_oid_str = NULL;
struct object_id content_oid, base_oid;
struct string_list items = STRING_LIST_INIT_NODUP;
/*
* Tokenize into two or three parts:
* 1. REF_DELTA, OFS_DELTA, or FULL.
* 2. The object ID for the content object.
* 3. The object ID for the base object (optional).
*/
if (string_list_split_in_place(&items, line.buf, " ", 3) < 0)
die("invalid input format: %s", line.buf);

if (items.nr < 2)
die("invalid input format: %s", line.buf);

type_str = items.items[0].string;
content_oid_str = items.items[1].string;

if (get_oid_hex(content_oid_str, &content_oid))
die("invalid object: %s", content_oid_str);
if (items.nr >= 3) {
base_oid_str = items.items[2].string;
if (get_oid_hex(base_oid_str, &base_oid))
die("invalid object: %s", base_oid_str);
}
string_list_clear(&items, 0);

if (!strcmp(type_str, "REF_DELTA"))
write_ref_delta(f, &content_oid, &base_oid);
else if (!strcmp(type_str, "OFS_DELTA"))
die("OFS_DELTA not implemented");
else if (!strcmp(type_str, "FULL"))
die("FULL not implemented");
else
die("unknown pack type: %s", type_str);
}

finalize_hashfile(f, NULL, FSYNC_COMPONENT_PACK,
CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
strbuf_release(&line);
return 0;
}
1 change: 1 addition & 0 deletions t/helper/test-tool.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ static struct test_cmd cmds[] = {
{ "mktemp", cmd__mktemp },
{ "name-hash", cmd__name_hash },
{ "online-cpus", cmd__online_cpus },
{ "pack-deltas", cmd__pack_deltas },
{ "pack-mtimes", cmd__pack_mtimes },
{ "parse-options", cmd__parse_options },
{ "parse-options-flags", cmd__parse_options_flags },
Expand Down
1 change: 1 addition & 0 deletions t/helper/test-tool.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ int cmd__mergesort(int argc, const char **argv);
int cmd__mktemp(int argc, const char **argv);
int cmd__name_hash(int argc, const char **argv);
int cmd__online_cpus(int argc, const char **argv);
int cmd__pack_deltas(int argc, const char **argv);
int cmd__pack_mtimes(int argc, const char **argv);
int cmd__parse_options(int argc, const char **argv);
int cmd__parse_options_flags(int argc, const char **argv);
Expand Down
36 changes: 34 additions & 2 deletions t/t5309-pack-delta-cycles.sh
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,10 @@ test_expect_success 'index-pack detects REF_DELTA cycles' '
test_expect_success 'failover to an object in another pack' '
clear_packs &&
git index-pack --stdin <ab.pack &&
test_must_fail git index-pack --stdin --fix-thin <cycle.pack

# This cycle does not fail since the existence of A & B in
# the repo allows us to resolve the cycle.
git index-pack --stdin --fix-thin <cycle.pack
'

test_expect_success 'failover to a duplicate object in the same pack' '
Expand All @@ -72,7 +75,36 @@ test_expect_success 'failover to a duplicate object in the same pack' '
pack_obj $A
} >recoverable.pack &&
pack_trailer recoverable.pack &&
test_must_fail git index-pack --fix-thin --stdin <recoverable.pack

# This cycle does not fail since the existence of a full copy
# of A in the pack allows us to resolve the cycle.
git index-pack --fix-thin --stdin <recoverable.pack
'

test_expect_success 'index-pack works with thin pack A->B->C with B on disk' '
git init server &&
(
cd server &&
test_commit_bulk 4
) &&

A=$(git -C server rev-parse HEAD^{tree}) &&
B=$(git -C server rev-parse HEAD~1^{tree}) &&
C=$(git -C server rev-parse HEAD~2^{tree}) &&
git -C server reset --hard HEAD~1 &&

cat >in <<-EOF &&
REF_DELTA $A $B
REF_DELTA $B $C
EOF

test-tool -C server pack-deltas 2 <in >thin.pack &&

git clone "file://$(pwd)/server" client &&
(
cd client &&
git index-pack --fix-thin --stdin <../thin.pack
)
'

test_done
Loading