From 835ad443d2f082ccd4da07da299e3080e1b4ed80 Mon Sep 17 00:00:00 2001 From: Tyrie Vella Date: Thu, 12 Jun 2025 16:34:16 -0700 Subject: [PATCH 1/4] Always index locally for prefetch packs The GVFS protocol includes an index file along with pack file in the prefetch workflow (primarily used on a new clone to fetch all commits and trees). Currently, GVFS blindly accepts that index file. This pull request changes GVFS prefetch to discard the index sent by the server and always create an index locally. This provides improved security and verification of the pack file, at the expense of performance for the first clone of a repository on a new drive. --- GVFS/GVFS.Common/Git/GitObjects.cs | 79 +++++++++++++----------------- 1 file changed, 34 insertions(+), 45 deletions(-) diff --git a/GVFS/GVFS.Common/Git/GitObjects.cs b/GVFS/GVFS.Common/Git/GitObjects.cs index c2fe88b805..b0b30faad9 100644 --- a/GVFS/GVFS.Common/Git/GitObjects.cs +++ b/GVFS/GVFS.Common/Git/GitObjects.cs @@ -706,64 +706,53 @@ private LooseObjectToWrite GetLooseObjectDestination(string sha) bytesDownloaded += packLength; - // We will try to build an index if the server does not send one - if (pack.IndexStream == null) + // We can't trust the index file from the server, so we will always build our own. + // We still need to consume and handle any exceptions from the index stream though. + var canContinue = true; + GitProcess.Result result; + if (this.TryBuildIndex(activity, packTempPath, out result, gitProcess)) { - GitProcess.Result result; - if (!this.TryBuildIndex(activity, packTempPath, out result, gitProcess)) + tempPacks.Add(new TempPrefetchPackAndIdx(pack.Timestamp, packName, packTempPath, packFlushTask, idxName, idxTempPath, idxFlushTask: null)); + if (pack.IndexStream != null) { - if (packFlushTask != null) + try { - packFlushTask.Wait(); + bytesDownloaded += pack.IndexStream.Length; + if (pack.IndexStream.CanSeek) + { + pack.IndexStream.Seek(0, SeekOrigin.End); + } + else + { + pack.IndexStream.CopyTo(Stream.Null); + } + } + catch (Exception e) + { + canContinue = false; + EventMetadata metadata = CreateEventMetadata(e); + activity.RelatedWarning(metadata, "Failed to read to end of index stream"); } - - // Move whatever has been successfully downloaded so far - Exception moveException; - this.TryFlushAndMoveTempPacks(tempPacks, ref latestTimestamp, out moveException); - - return new RetryWrapper.CallbackResult(null, true); } - - tempPacks.Add(new TempPrefetchPackAndIdx(pack.Timestamp, packName, packTempPath, packFlushTask, idxName, idxTempPath, idxFlushTask: null)); } else { - Task indexFlushTask; - if (this.TryWriteTempFile(activity, pack.IndexStream, idxTempPath, out indexLength, out indexFlushTask)) + canContinue = false; + } + + if (!canContinue) + { + if (packFlushTask != null) { - tempPacks.Add(new TempPrefetchPackAndIdx(pack.Timestamp, packName, packTempPath, packFlushTask, idxName, idxTempPath, indexFlushTask)); + packFlushTask.Wait(); } - else - { - bytesDownloaded += indexLength; - - // Try to build the index manually, then retry the prefetch - GitProcess.Result result; - if (this.TryBuildIndex(activity, packTempPath, out result, gitProcess)) - { - // If we were able to recreate the failed index - // we can start the prefetch at the next timestamp - tempPacks.Add(new TempPrefetchPackAndIdx(pack.Timestamp, packName, packTempPath, packFlushTask, idxName, idxTempPath, idxFlushTask: null)); - } - else - { - if (packFlushTask != null) - { - packFlushTask.Wait(); - } - } - // Move whatever has been successfully downloaded so far - Exception moveException; - this.TryFlushAndMoveTempPacks(tempPacks, ref latestTimestamp, out moveException); + // Move whatever has been successfully downloaded so far + Exception moveException; + this.TryFlushAndMoveTempPacks(tempPacks, ref latestTimestamp, out moveException); - // The download stream will not be in a good state if the index download fails. - // So we have to restart the prefetch - return new RetryWrapper.CallbackResult(null, true); - } + return new RetryWrapper.CallbackResult(null, true); } - - bytesDownloaded += indexLength; } Exception exception = null; From 4919f248076751c427edd29dbe64d79bc86c7b47 Mon Sep 17 00:00:00 2001 From: Tyrie Vella Date: Mon, 16 Jun 2025 10:42:59 -0700 Subject: [PATCH 2/4] Index prefetch packs in parallel --- GVFS/GVFS.Common/Git/GitObjects.cs | 114 ++++++++++++++++++----------- GVFS/GVFS.Common/Git/GitProcess.cs | 6 +- 2 files changed, 77 insertions(+), 43 deletions(-) diff --git a/GVFS/GVFS.Common/Git/GitObjects.cs b/GVFS/GVFS.Common/Git/GitObjects.cs index b0b30faad9..3a4f4a1848 100644 --- a/GVFS/GVFS.Common/Git/GitObjects.cs +++ b/GVFS/GVFS.Common/Git/GitObjects.cs @@ -676,11 +676,16 @@ private LooseObjectToWrite GetLooseObjectDestination(string sha) string tempPackFolderPath = Path.Combine(this.Enlistment.GitPackRoot, TempPackFolder); this.fileSystem.CreateDirectory(tempPackFolderPath); - List tempPacks = new List(); - foreach (PrefetchPacksDeserializer.PackAndIndex pack in deserializer.EnumeratePacks()) + var tempPacksTasks = new List>(); + // Future: We could manage cancellation of index building tasks if one fails (to stop indexing of later + // files if an early one fails), but in practice the first pack file takes the majority of the time and + // all the others will finish long before it so there would be no benefit to doing so. + bool allSucceeded = true; + + // Read each pack from the stream to a temp file, and start a task to index it. + foreach (PrefetchPacksDeserializer.PackAndIndex packHandle in deserializer.EnumeratePacks()) { - // The advertised size may not match the actual, on-disk size. - long indexLength = 0; + var pack = packHandle; // Capture packHandle in a new variable to avoid closure issues with async index task long packLength; // Write the temp and index to a temp folder to avoid putting corrupt files in the pack folder @@ -701,64 +706,82 @@ private LooseObjectToWrite GetLooseObjectDestination(string sha) if (!this.TryWriteTempFile(activity, pack.PackStream, packTempPath, out packLength, out packFlushTask)) { bytesDownloaded += packLength; - return new RetryWrapper.CallbackResult(null, true); + allSucceeded = false; + break; } bytesDownloaded += packLength; // We can't trust the index file from the server, so we will always build our own. - // We still need to consume and handle any exceptions from the index stream though. - var canContinue = true; - GitProcess.Result result; - if (this.TryBuildIndex(activity, packTempPath, out result, gitProcess)) + // For performance, we run the index build in the background while we continue downloading the next pack. + var indexTask = Task.Run(async () => { - tempPacks.Add(new TempPrefetchPackAndIdx(pack.Timestamp, packName, packTempPath, packFlushTask, idxName, idxTempPath, idxFlushTask: null)); - if (pack.IndexStream != null) + // GitProcess only permits one process per instance at a time, so we need to duplicate it to run the index build in parallel. + // This is safe because each process is only accessing the pack file we direct it to which is not yet part + // of the enlistment. + GitProcess gitProcessForIndex = new GitProcess(this.Enlistment); + if (this.TryBuildIndex(activity, packTempPath, out var _, gitProcessForIndex)) { - try + return new TempPrefetchPackAndIdx(pack.Timestamp, packName, packTempPath, packFlushTask, idxName, idxTempPath, idxFlushTask: null); + } + else + { + await packFlushTask; + return null; + } + }); + tempPacksTasks.Add(indexTask); + + // If the server provided an index stream, we still need to consume and handle any exceptions it even + // though we are otherwise ignoring it. + if (pack.IndexStream != null) + { + try + { + bytesDownloaded += pack.IndexStream.Length; + if (pack.IndexStream.CanSeek) { - bytesDownloaded += pack.IndexStream.Length; - if (pack.IndexStream.CanSeek) - { - pack.IndexStream.Seek(0, SeekOrigin.End); - } - else - { - pack.IndexStream.CopyTo(Stream.Null); - } + pack.IndexStream.Seek(0, SeekOrigin.End); } - catch (Exception e) + else { - canContinue = false; - EventMetadata metadata = CreateEventMetadata(e); - activity.RelatedWarning(metadata, "Failed to read to end of index stream"); + pack.IndexStream.CopyTo(Stream.Null); } } + catch (Exception e) + { + EventMetadata metadata = CreateEventMetadata(e); + activity.RelatedWarning(metadata, "Failed to read to end of index stream"); + allSucceeded = false; + break; + } } - else + } + + // Wait for the index tasks to complete. If any fail, we still copy the prior successful ones + // to the pack folder so that the retry will be incremental from where the failure occurred. + var tempPacks = new List(); + bool indexTasksSucceededSoFar = true; + foreach (var task in tempPacksTasks) + { + TempPrefetchPackAndIdx tempPack = task.Result; + if (tempPack != null && indexTasksSucceededSoFar) { - canContinue = false; + tempPacks.Add(tempPack); } - - if (!canContinue) + else { - if (packFlushTask != null) - { - packFlushTask.Wait(); - } - - // Move whatever has been successfully downloaded so far - Exception moveException; - this.TryFlushAndMoveTempPacks(tempPacks, ref latestTimestamp, out moveException); - - return new RetryWrapper.CallbackResult(null, true); + indexTasksSucceededSoFar = false; + tempPack?.PackFlushTask.Wait(); + break; } } + allSucceeded = allSucceeded && indexTasksSucceededSoFar; Exception exception = null; if (!this.TryFlushAndMoveTempPacks(tempPacks, ref latestTimestamp, out exception)) { - return new RetryWrapper.CallbackResult(exception, true); + allSucceeded = false; } foreach (TempPrefetchPackAndIdx tempPack in tempPacks) @@ -766,8 +789,15 @@ private LooseObjectToWrite GetLooseObjectDestination(string sha) packIndexes.Add(tempPack.IdxName); } - return new RetryWrapper.CallbackResult( - new GitObjectsHttpRequestor.GitObjectTaskResult(success: true)); + if (allSucceeded) + { + return new RetryWrapper.CallbackResult( + new GitObjectsHttpRequestor.GitObjectTaskResult(success: true)); + } + else + { + return new RetryWrapper.CallbackResult(exception, shouldRetry: true); + } } } diff --git a/GVFS/GVFS.Common/Git/GitProcess.cs b/GVFS/GVFS.Common/Git/GitProcess.cs index cda01f34a3..eb79df6a76 100644 --- a/GVFS/GVFS.Common/Git/GitProcess.cs +++ b/GVFS/GVFS.Common/Git/GitProcess.cs @@ -580,7 +580,11 @@ public Result VerifyCommitGraph(string objectDir) public Result IndexPack(string packfilePath, string idxOutputPath) { - return this.InvokeGitAgainstDotGitFolder($"index-pack -o \"{idxOutputPath}\" \"{packfilePath}\""); + /* Git's default thread count is Environment.ProcessorCount / 2, with a maximum of 20. + * Testing shows that we can get a 5% decrease in gvfs clone time for large repositories by using more threads, but + * we won't go over ProcessorCount or 20. */ + var threadCount = Math.Min(Environment.ProcessorCount, 20); + return this.InvokeGitAgainstDotGitFolder($"index-pack --threads={threadCount} -o \"{idxOutputPath}\" \"{packfilePath}\""); } /// From 81c36e132958b256d6e3826fc4de880be60c4cba Mon Sep 17 00:00:00 2001 From: Tyrie Vella Date: Thu, 19 Jun 2025 14:40:50 -0700 Subject: [PATCH 3/4] Remove trust of received pack indexes with config --- GVFS/GVFS.Common/GVFSConstants.cs | 4 ++ GVFS/GVFS.Common/Git/GitObjects.cs | 108 ++++++++++++++++++++--------- GVFS/GVFS.Common/Git/GitProcess.cs | 8 ++- 3 files changed, 87 insertions(+), 33 deletions(-) diff --git a/GVFS/GVFS.Common/GVFSConstants.cs b/GVFS/GVFS.Common/GVFSConstants.cs index 4872852014..a6e619ff7c 100644 --- a/GVFS/GVFS.Common/GVFSConstants.cs +++ b/GVFS/GVFS.Common/GVFSConstants.cs @@ -37,6 +37,10 @@ public static class GitConfig public const string GVFSTelemetryPipe = GitConfig.GVFSPrefix + "telemetry-pipe"; public const string IKey = GitConfig.GVFSPrefix + "ikey"; public const string HooksExtension = ".hooks"; + + /* Intended to be a temporary config to allow testing of distrusting pack indexes from cache server + * before it is enabled by default. */ + public const string TrustPackIndexes = GVFSPrefix + "trust-pack-indexes"; } public static class LocalGVFSConfig diff --git a/GVFS/GVFS.Common/Git/GitObjects.cs b/GVFS/GVFS.Common/Git/GitObjects.cs index 3a4f4a1848..42dab63877 100644 --- a/GVFS/GVFS.Common/Git/GitObjects.cs +++ b/GVFS/GVFS.Common/Git/GitObjects.cs @@ -131,11 +131,23 @@ public virtual bool TryDownloadPrefetchPacks(GitProcess gitProcess, long latestT { long bytesDownloaded = 0; + /* Distrusting the indexes from the server is a security feature to prevent a compromised server from sending a + * pack file and an index file that do not match. + * Eventually we will make this the default, but it has a high performance cost for the first prefetch after + * cloning a large repository, so it must be explicitly enabled for now. */ + bool trustPackIndexes = true; + if (gitProcess.TryGetFromConfig(GVFSConstants.GitConfig.TrustPackIndexes, forceOutsideEnlistment: false, out var valueString) + && bool.TryParse(valueString, out var trustPackIndexesConfig)) + { + trustPackIndexes = trustPackIndexesConfig; + } + metadata.Add("trustPackIndexes", trustPackIndexes); + long requestId = HttpRequestor.GetNewRequestId(); List innerPackIndexes = null; RetryWrapper.InvocationResult result = this.GitObjectRequestor.TrySendProtocolRequest( requestId: requestId, - onSuccess: (tryCount, response) => this.DeserializePrefetchPacks(response, ref latestTimestamp, ref bytesDownloaded, ref innerPackIndexes, gitProcess), + onSuccess: (tryCount, response) => this.DeserializePrefetchPacks(response, ref latestTimestamp, ref bytesDownloaded, ref innerPackIndexes, gitProcess, trustPackIndexes), onFailure: RetryWrapper.StandardErrorHandler(activity, requestId, "TryDownloadPrefetchPacks"), method: HttpMethod.Get, endPointGenerator: () => new Uri( @@ -394,6 +406,7 @@ public virtual GitProcess.Result IndexPackFile(string packfilePath, GitProcess g gitProcess = new GitProcess(this.Enlistment); } + GitProcess.Result result = gitProcess.IndexPack(packfilePath, tempIdxPath); if (result.ExitCodeIsFailure) { @@ -662,7 +675,8 @@ private LooseObjectToWrite GetLooseObjectDestination(string sha) ref long latestTimestamp, ref long bytesDownloaded, ref List packIndexes, - GitProcess gitProcess) + GitProcess gitProcess, + bool trustPackIndexes) { if (packIndexes == null) { @@ -712,49 +726,58 @@ private LooseObjectToWrite GetLooseObjectDestination(string sha) bytesDownloaded += packLength; - // We can't trust the index file from the server, so we will always build our own. - // For performance, we run the index build in the background while we continue downloading the next pack. - var indexTask = Task.Run(async () => + if (trustPackIndexes + && pack.IndexStream != null) { - // GitProcess only permits one process per instance at a time, so we need to duplicate it to run the index build in parallel. - // This is safe because each process is only accessing the pack file we direct it to which is not yet part - // of the enlistment. - GitProcess gitProcessForIndex = new GitProcess(this.Enlistment); - if (this.TryBuildIndex(activity, packTempPath, out var _, gitProcessForIndex)) + // The server provided an index stream, we can trust it, and were able to read it successfully, so we just need to wait for the index to flush. + if (this.TryWriteTempFile(activity, pack.IndexStream, idxTempPath, out var indexLength, out var indexFlushTask)) { - return new TempPrefetchPackAndIdx(pack.Timestamp, packName, packTempPath, packFlushTask, idxName, idxTempPath, idxFlushTask: null); + bytesDownloaded += indexLength; + tempPacksTasks.Add(Task.FromResult( + new TempPrefetchPackAndIdx(pack.Timestamp, packName, packTempPath, packFlushTask, idxName, idxTempPath, indexFlushTask))); } else { - await packFlushTask; - return null; + bytesDownloaded += indexLength; + // we can try to build the index ourself, and if it's successful then on the retry we can pick up from that point. + var indexTask = StartPackIndexAsync(activity, pack, packName, packTempPath, idxName, idxTempPath, packFlushTask); + tempPacksTasks.Add(indexTask); + // but we need to stop trying to read from the download stream as that has failed. + allSucceeded = false; + break; } - }); - tempPacksTasks.Add(indexTask); - - // If the server provided an index stream, we still need to consume and handle any exceptions it even - // though we are otherwise ignoring it. - if (pack.IndexStream != null) + } + else { - try + // Either we can't trust the index file from the server, or the server didn't provide one, so we will build our own. + // For performance, we run the index build in the background while we continue downloading the next pack. + var indexTask = StartPackIndexAsync(activity, pack, packName, packTempPath, idxName, idxTempPath, packFlushTask); + tempPacksTasks.Add(indexTask); + + // If the server provided an index stream, we still need to consume and handle any exceptions it even + // though we are otherwise ignoring it. + if (pack.IndexStream != null) { - bytesDownloaded += pack.IndexStream.Length; - if (pack.IndexStream.CanSeek) + try { - pack.IndexStream.Seek(0, SeekOrigin.End); + bytesDownloaded += pack.IndexStream.Length; + if (pack.IndexStream.CanSeek) + { + pack.IndexStream.Seek(0, SeekOrigin.End); + } + else + { + pack.IndexStream.CopyTo(Stream.Null); + } } - else + catch (Exception e) { - pack.IndexStream.CopyTo(Stream.Null); + EventMetadata metadata = CreateEventMetadata(e); + activity.RelatedWarning(metadata, "Failed to read to end of index stream"); + allSucceeded = false; + break; } } - catch (Exception e) - { - EventMetadata metadata = CreateEventMetadata(e); - activity.RelatedWarning(metadata, "Failed to read to end of index stream"); - allSucceeded = false; - break; - } } } @@ -801,6 +824,27 @@ private LooseObjectToWrite GetLooseObjectDestination(string sha) } } + private Task StartPackIndexAsync(ITracer activity, PrefetchPacksDeserializer.PackAndIndex pack, string packName, string packTempPath, string idxName, string idxTempPath, Task packFlushTask) + { + var indexTask = Task.Run(async () => + { + // GitProcess only permits one process per instance at a time, so we need to duplicate it to run the index build in parallel. + // This is safe because each process is only accessing the pack file we direct it to which is not yet part + // of the enlistment. + GitProcess gitProcessForIndex = new GitProcess(this.Enlistment); + if (this.TryBuildIndex(activity, packTempPath, out var _, gitProcessForIndex)) + { + return new TempPrefetchPackAndIdx(pack.Timestamp, packName, packTempPath, packFlushTask, idxName, idxTempPath, idxFlushTask: null); + } + else + { + await packFlushTask; + return null; + } + }); + return indexTask; + } + private bool TryFlushAndMoveTempPacks(List tempPacks, ref long latestTimestamp, out Exception exception) { exception = null; diff --git a/GVFS/GVFS.Common/Git/GitProcess.cs b/GVFS/GVFS.Common/Git/GitProcess.cs index eb79df6a76..6b31c4f503 100644 --- a/GVFS/GVFS.Common/Git/GitProcess.cs +++ b/GVFS/GVFS.Common/Git/GitProcess.cs @@ -584,7 +584,13 @@ public Result IndexPack(string packfilePath, string idxOutputPath) * Testing shows that we can get a 5% decrease in gvfs clone time for large repositories by using more threads, but * we won't go over ProcessorCount or 20. */ var threadCount = Math.Min(Environment.ProcessorCount, 20); - return this.InvokeGitAgainstDotGitFolder($"index-pack --threads={threadCount} -o \"{idxOutputPath}\" \"{packfilePath}\""); + string command = $"index-pack --threads={threadCount} -o \"{idxOutputPath}\" \"{packfilePath}\""; + + // If index-pack is invoked within an enlistment, then it reads all the other objects and pack indexes available + // in the enlistment in order to verify references from within this pack file, even if --verify or similar + // options are not passed. + // Since we aren't verifying, we invoke index-pack outside the enlistment for performance. + return this.InvokeGitOutsideEnlistment(command); } /// From 54f5bfe45ff4e1a8465e120e1daad27e67817cf4 Mon Sep 17 00:00:00 2001 From: Tyrie Vella Date: Mon, 23 Jun 2025 09:18:59 -0700 Subject: [PATCH 4/4] Remove extra newline --- GVFS/GVFS.Common/Git/GitObjects.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/GVFS/GVFS.Common/Git/GitObjects.cs b/GVFS/GVFS.Common/Git/GitObjects.cs index 42dab63877..6614bd5734 100644 --- a/GVFS/GVFS.Common/Git/GitObjects.cs +++ b/GVFS/GVFS.Common/Git/GitObjects.cs @@ -406,7 +406,6 @@ public virtual GitProcess.Result IndexPackFile(string packfilePath, GitProcess g gitProcess = new GitProcess(this.Enlistment); } - GitProcess.Result result = gitProcess.IndexPack(packfilePath, tempIdxPath); if (result.ExitCodeIsFailure) {