From 5bbde892ba65888b7c652d4b7cbed849c45520be Mon Sep 17 00:00:00 2001 From: Scott Arbeit Date: Mon, 31 Mar 2025 23:55:08 -0400 Subject: [PATCH 01/13] Adding support for contributing using Windows. --- CONTRIBUTING.md | 11 ++++-- Makefile.win | 51 +++++++++++++++++++++++++++ script/bootstrap.ps1 | 18 ++++++++++ script/ensure-go-installed.ps1 | 64 ++++++++++++++++++++++++++++++++++ script/go.ps1 | 21 +++++++++++ 5 files changed, 163 insertions(+), 2 deletions(-) create mode 100644 Makefile.win create mode 100644 script/bootstrap.ps1 create mode 100644 script/ensure-go-installed.ps1 create mode 100644 script/go.ps1 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f4427e8..dfb3a7b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -11,8 +11,15 @@ Please note that this project is released with a [Contributor Code of Conduct][c ## Submitting a pull request 1. [Fork][fork] and clone the repository -2. Configure and install the dependencies: `script/bootstrap` -3. Make sure the tests pass on your machine: `make test` + + + +2. Configure and install the dependencies + - On Unix-y machines: `script/bootstrap` + - On Windows: `script/bootstrap.ps1` (requires PowerShell 7+) +3. Make sure the tests pass on your machine + - On Unix-y machines: `make test` + - On Windows machines: `make -f Makefile.win test` (because there's a different Makefile when building on Windows) 4. Create a new branch: `git checkout -b my-branch-name` 5. Make your change, add tests, and make sure the tests still pass 6. Push to your fork and [submit a pull request][pr] diff --git a/Makefile.win b/Makefile.win new file mode 100644 index 0000000..532a67b --- /dev/null +++ b/Makefile.win @@ -0,0 +1,51 @@ +# Windows-specific Makefile for git-sizer designed for PowerShell + +PACKAGE := github.com/github/git-sizer +GO111MODULES := 1 + +# Use the project's go wrapper script via the -File parameter to avoid loading your profile +GOSCRIPT := $(CURDIR)/script/go.ps1 +# GOSCRIPTCORRECTED := $(shell pwsh.exe -NoProfile -ExecutionPolicy Bypass -Command "$(GOSCRIPT) -replace '/', '\'") +GO := pwsh.exe -NoProfile -ExecutionPolicy Bypass -File $(GOSCRIPT) + +# Get the build version from git using try/catch instead of "||" +BUILD_VERSION := $(shell pwsh.exe -NoProfile -ExecutionPolicy Bypass -Command "try { git describe --tags --always --dirty 2>$null } catch { Write-Output 'unknown' }") +LDFLAGS := -X github.com/github/git-sizer/main.BuildVersion=$(BUILD_VERSION) +GOFLAGS := -mod=readonly + +ifdef USE_ISATTY +GOFLAGS := $(GOFLAGS) --tags isatty +endif + +# Default target +all: bin/git-sizer.exe + +# Main binary target +bin/git-sizer.exe: + @powershell -NoProfile -ExecutionPolicy Bypass -Command "if (-not (Test-Path bin)) { New-Item -ItemType Directory -Path bin | Out-Null }" + $(GO) build $(GOFLAGS) -ldflags "$(LDFLAGS)" -o .\bin\git-sizer.exe . + +# Test target +test: bin/git-sizer.exe gotest + +# Run go tests +gotest: + $(GO) test -timeout 60s $(GOFLAGS) -ldflags "$(LDFLAGS)" ./... + +# Clean up builds +clean: + @powershell -NoProfile -ExecutionPolicy Bypass -Command "if (Test-Path bin) { Remove-Item -Recurse -Force bin }" + +# Help target +help: + @powershell -NoProfile -ExecutionPolicy Bypass -Command "Write-Host 'Windows Makefile for git-sizer' -ForegroundColor Cyan" + @powershell -NoProfile -ExecutionPolicy Bypass -Command "Write-Host ''" + @powershell -NoProfile -ExecutionPolicy Bypass -Command "Write-Host 'Targets:' -ForegroundColor Green" + @powershell -NoProfile -ExecutionPolicy Bypass -Command "Write-Host ' all - Build git-sizer (default)'" + @powershell -NoProfile -ExecutionPolicy Bypass -Command "Write-Host ' test - Run tests'" + @powershell -NoProfile -ExecutionPolicy Bypass -Command "Write-Host ' clean - Clean build artifacts'" + @powershell -NoProfile -ExecutionPolicy Bypass -Command "Write-Host ''" + @powershell -NoProfile -ExecutionPolicy Bypass -Command "Write-Host 'Example usage:' -ForegroundColor Green" + @powershell -NoProfile -ExecutionPolicy Bypass -Command "Write-Host ' nmake -f Makefile.win'" + @powershell -NoProfile -ExecutionPolicy Bypass -Command "Write-Host ' nmake -f Makefile.win test'" + \ No newline at end of file diff --git a/script/bootstrap.ps1 b/script/bootstrap.ps1 new file mode 100644 index 0000000..7f0413a --- /dev/null +++ b/script/bootstrap.ps1 @@ -0,0 +1,18 @@ +#!/usr/bin/env pwsh + +# Exit immediately if any command fails +$ErrorActionPreference = "Stop" + +# Change directory to the parent directory of the script +Set-Location -Path (Split-Path -Parent $PSCommandPath | Split-Path -Parent) + +# Set ROOTDIR environment variable to the current directory +$env:ROOTDIR = (Get-Location).Path + +# Check if the operating system is macOS +if ($IsMacOS) { + brew bundle +} + +# Source the ensure-go-installed.ps1 script +. ./script/ensure-go-installed.ps1 \ No newline at end of file diff --git a/script/ensure-go-installed.ps1 b/script/ensure-go-installed.ps1 new file mode 100644 index 0000000..5479eec --- /dev/null +++ b/script/ensure-go-installed.ps1 @@ -0,0 +1,64 @@ +# This script is meant to be sourced with ROOTDIR set. + +if (-not $env:ROOTDIR) { + Write-Error 'ensure-go-installed.ps1 invoked without ROOTDIR set!' +} + +# Function to check if Go is installed and at least version 1.21 +function GoOk { + $goVersionOutput = & go version 2>$null + if ($goVersionOutput) { + $goVersion = $goVersionOutput -match 'go(\d+)\.(\d+)' | Out-Null + $majorVersion = [int]$Matches[1] + $minorVersion = [int]$Matches[2] + return ($majorVersion -eq 1 -and $minorVersion -ge 21) + } + return $false +} + +# Function to set up a local Go installation if available +function SetUpVendoredGo { + $GO_VERSION = "go1.23.7" + $VENDORED_GOROOT = Join-Path -Path $env:ROOTDIR -ChildPath "vendor/$GO_VERSION/go" + if (Test-Path -Path "$VENDORED_GOROOT/bin/go") { + $env:GOROOT = $VENDORED_GOROOT + $env:PATH = "$env:GOROOT/bin;$env:PATH" + } +} + +# Function to check if Make is installed and install it if needed +function EnsureMakeInstalled { + $makeInstalled = $null -ne (Get-Command "make" -ErrorAction SilentlyContinue) + if (-not $makeInstalled) { + #Write-Host "Installing Make using winget..." + winget install --no-upgrade --nowarn -e --id GnuWin32.Make + if ($LASTEXITCODE -ne 0 -and $LASTEXITCODE -ne 0x8A150061) { + Write-Error "Failed to install Make. Please install it manually. Exit code: $LASTEXITCODE" + } + # Refresh PATH to include the newly installed Make + $env:PATH = [System.Environment]::GetEnvironmentVariable("PATH", "Machine") + ";" + [System.Environment]::GetEnvironmentVariable("PATH", "User") + } + + # Add GnuWin32 bin directory directly to the PATH + $gnuWin32Path = "C:\Program Files (x86)\GnuWin32\bin" + if (Test-Path -Path $gnuWin32Path) { + $env:PATH = "$gnuWin32Path;$env:PATH" + } else { + Write-Host "Couldn't find GnuWin32 bin directory at the expected location." + # Also refresh PATH from environment variables as a fallback + $env:PATH = [System.Environment]::GetEnvironmentVariable("PATH", "Machine") + ";" + [System.Environment]::GetEnvironmentVariable("PATH", "User") + } +} + +SetUpVendoredGo + +if (-not (GoOk)) { + & ./script/install-vendored-go >$null + if ($LASTEXITCODE -ne 0) { + exit 1 + } + SetUpVendoredGo +} + +# Ensure Make is installed +EnsureMakeInstalled diff --git a/script/go.ps1 b/script/go.ps1 new file mode 100644 index 0000000..e2314b8 --- /dev/null +++ b/script/go.ps1 @@ -0,0 +1,21 @@ +# Ensure that script errors stop execution +$ErrorActionPreference = "Stop" + +# Determine the root directory of the project. +$scriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path +$ROOTDIR = (Resolve-Path (Join-Path $scriptDir "..")).Path + +# Source the ensure-go-installed functionality. +# (This assumes you have a corresponding PowerShell version of ensure-go-installed. +# If not, you could call the bash version via bash.exe if available.) +$ensureScript = Join-Path $ROOTDIR "script\ensure-go-installed.ps1" +if (Test-Path $ensureScript) { + . $ensureScript +} else { + Write-Error "Unable to locate '$ensureScript'. Please provide a PowerShell version of ensure-go-installed." +} + +# Execute the actual 'go' command with passed arguments. +# This re-invokes the Go tool in PATH. +$goExe = "go" +& $goExe @args \ No newline at end of file From 25b2390918919689421dee37cabee540ae051857 Mon Sep 17 00:00:00 2001 From: Scott Arbeit Date: Tue, 1 Apr 2025 16:26:43 -0400 Subject: [PATCH 02/13] Added compressed size and updated a few labels. --- Makefile.win | 19 +++++++++++-------- git-sizer.go | 13 +++++++++++++ git/git.go | 7 +++++-- sizes/dirsize.go | 28 ++++++++++++++++++++++++++++ sizes/output.go | 19 +++++++++++++------ sizes/sizes.go | 3 +++ 6 files changed, 73 insertions(+), 16 deletions(-) create mode 100644 sizes/dirsize.go diff --git a/Makefile.win b/Makefile.win index 532a67b..ec6f208 100644 --- a/Makefile.win +++ b/Makefile.win @@ -5,11 +5,10 @@ GO111MODULES := 1 # Use the project's go wrapper script via the -File parameter to avoid loading your profile GOSCRIPT := $(CURDIR)/script/go.ps1 -# GOSCRIPTCORRECTED := $(shell pwsh.exe -NoProfile -ExecutionPolicy Bypass -Command "$(GOSCRIPT) -replace '/', '\'") GO := pwsh.exe -NoProfile -ExecutionPolicy Bypass -File $(GOSCRIPT) # Get the build version from git using try/catch instead of "||" -BUILD_VERSION := $(shell pwsh.exe -NoProfile -ExecutionPolicy Bypass -Command "try { git describe --tags --always --dirty 2>$null } catch { Write-Output 'unknown' }") +BUILD_VERSION := $(shell pwsh.exe -NoProfile -ExecutionPolicy Bypass -Command "try { git describe --tags --always --dirty 2>$$null } catch { Write-Output 'unknown' }") LDFLAGS := -X github.com/github/git-sizer/main.BuildVersion=$(BUILD_VERSION) GOFLAGS := -mod=readonly @@ -17,16 +16,21 @@ ifdef USE_ISATTY GOFLAGS := $(GOFLAGS) --tags isatty endif +# Find all Go source files +GO_SRC_FILES := $(shell powershell -NoProfile -ExecutionPolicy Bypass -Command "Get-ChildItem -Path . -Filter *.go -Recurse | Select-Object -ExpandProperty FullName") + # Default target all: bin/git-sizer.exe -# Main binary target -bin/git-sizer.exe: +# Main binary target - depend on all Go source files +bin/git-sizer.exe: $(GO_SRC_FILES) @powershell -NoProfile -ExecutionPolicy Bypass -Command "if (-not (Test-Path bin)) { New-Item -ItemType Directory -Path bin | Out-Null }" - $(GO) build $(GOFLAGS) -ldflags "$(LDFLAGS)" -o .\bin\git-sizer.exe . + $(GO) build $(GOFLAGS) -ldflags "$(LDFLAGS)" -a -o .\bin\git-sizer.exe . -# Test target -test: bin/git-sizer.exe gotest +# Test target - explicitly run the build first to ensure binary is up to date +test: + @$(MAKE) -f Makefile.win bin/git-sizer.exe + @$(MAKE) -f Makefile.win gotest # Run go tests gotest: @@ -48,4 +52,3 @@ help: @powershell -NoProfile -ExecutionPolicy Bypass -Command "Write-Host 'Example usage:' -ForegroundColor Green" @powershell -NoProfile -ExecutionPolicy Bypass -Command "Write-Host ' nmake -f Makefile.win'" @powershell -NoProfile -ExecutionPolicy Bypass -Command "Write-Host ' nmake -f Makefile.win test'" - \ No newline at end of file diff --git a/git-sizer.go b/git-sizer.go index 1ef9812..b78dc5f 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -331,6 +331,19 @@ func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []st return fmt.Errorf("error scanning repository: %w", err) } + // Calculate the actual size of the .git directory + gitDir, err := repo.GitDir() + if err != nil { + return fmt.Errorf("error getting Git directory path: %w", err) + } + + gitDirSize, err := sizes.CalculateGitDirSize(gitDir) + if err != nil { + return fmt.Errorf("error calculating Git directory size: %w", err) + } + + historySize.GitDirSize = gitDirSize + if jsonOutput { var j []byte var err error diff --git a/git/git.go b/git/git.go index 096ce81..5fc8f34 100644 --- a/git/git.go +++ b/git/git.go @@ -150,8 +150,11 @@ func (repo *Repository) GitCommand(callerArgs ...string) *exec.Cmd { // GitDir returns the path to `repo`'s `GIT_DIR`. It might be absolute // or it might be relative to the current directory. -func (repo *Repository) GitDir() string { - return repo.gitDir +func (repo *Repository) GitDir() (string, error) { + if repo.gitDir == "" { + return "", errors.New("gitDir is not set") + } + return repo.gitDir, nil } // GitPath returns that path of a file within the git repository, by diff --git a/sizes/dirsize.go b/sizes/dirsize.go new file mode 100644 index 0000000..3b60ed2 --- /dev/null +++ b/sizes/dirsize.go @@ -0,0 +1,28 @@ +package sizes + +import ( + "os" + "path/filepath" + + "github.com/github/git-sizer/counts" +) + +// CalculateGitDirSize returns the total size in bytes of the .git directory +func CalculateGitDirSize(gitDir string) (counts.Count64, error) { + var totalSize counts.Count64 + + err := filepath.Walk(gitDir, func(path string, info os.FileInfo, err error) error { + if err != nil { + // Skip files we can't access + return nil + } + + // Only count files, not directories + if !info.IsDir() { + totalSize.Increment(counts.Count64(info.Size())) + } + return nil + }) + + return totalSize, err +} diff --git a/sizes/output.go b/sizes/output.go index 933cc05..0538cb7 100644 --- a/sizes/output.go +++ b/sizes/output.go @@ -279,10 +279,10 @@ func (t *Threshold) Type() string { // A `pflag.Value` that can be used as a boolean option that sets a // `Threshold` variable to a fixed value. For example, // -// pflag.Var( -// sizes.NewThresholdFlagValue(&threshold, 30), -// "critical", "only report critical statistics", -// ) +// pflag.Var( +// sizes.NewThresholdFlagValue(&threshold, 30), +// "critical", "only report critical statistics", +// ) // // adds a `--critical` flag that sets `threshold` to 30. type thresholdFlagValue struct { @@ -492,7 +492,7 @@ func (s *HistorySize) contents(refGroups []RefGroup) tableContents { return S( "", S( - "Overall repository size", + "Repository statistics", S( "Commits", I("uniqueCommitCount", "Count", @@ -521,11 +521,18 @@ func (s *HistorySize) contents(refGroups []RefGroup) tableContents { I("uniqueBlobCount", "Count", "The total number of distinct blob objects", nil, s.UniqueBlobCount, metric, "", 1.5e6), - I("uniqueBlobSize", "Total size", + I("uniqueBlobSize", "Uncompressed total size", "The total size of all distinct blob objects", nil, s.UniqueBlobSize, binary, "B", 10e9), ), + S( + "On-disk size", + I("gitDirSize", "Compressed total size", + "The actual on-disk size of the .git directory", + nil, s.GitDirSize, binary, "B", 1e9), + ), + S( "Annotated tags", I("uniqueTagCount", "Count", diff --git a/sizes/sizes.go b/sizes/sizes.go index b3de0bc..4ed115c 100644 --- a/sizes/sizes.go +++ b/sizes/sizes.go @@ -210,6 +210,9 @@ type HistorySize struct { // The tree with the maximum expanded submodule count. MaxExpandedSubmoduleCountTree *Path `json:"max_expanded_submodule_count_tree,omitempty"` + + // The actual size of the .git directory on disk + GitDirSize counts.Count64 `json:"git_dir_size"` } // Convenience function: forget `*path` if it is non-nil and overwrite From ed4478883f640a350634847723c059b2bd6b2757 Mon Sep 17 00:00:00 2001 From: Scott Arbeit Date: Wed, 2 Apr 2025 14:20:25 -0400 Subject: [PATCH 03/13] Updating CONTRIBUTING.md --- CONTRIBUTING.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index dfb3a7b..e0d3b51 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -15,10 +15,10 @@ Please note that this project is released with a [Contributor Code of Conduct][c 2. Configure and install the dependencies - - On Unix-y machines: `script/bootstrap` + - On Unix-like machines: `script/bootstrap` - On Windows: `script/bootstrap.ps1` (requires PowerShell 7+) 3. Make sure the tests pass on your machine - - On Unix-y machines: `make test` + - On Unix-like machines: `make test` - On Windows machines: `make -f Makefile.win test` (because there's a different Makefile when building on Windows) 4. Create a new branch: `git checkout -b my-branch-name` 5. Make your change, add tests, and make sure the tests still pass From 469485b19698604c0d22ad7560dee4a039ab5e65 Mon Sep 17 00:00:00 2001 From: Scott Arbeit Date: Wed, 2 Apr 2025 14:44:54 -0400 Subject: [PATCH 04/13] Linter updates. --- git-sizer.go | 4 ++-- sizes/sizes.go | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index b78dc5f..0f44aad 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -336,12 +336,12 @@ func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []st if err != nil { return fmt.Errorf("error getting Git directory path: %w", err) } - + gitDirSize, err := sizes.CalculateGitDirSize(gitDir) if err != nil { return fmt.Errorf("error calculating Git directory size: %w", err) } - + historySize.GitDirSize = gitDirSize if jsonOutput { diff --git a/sizes/sizes.go b/sizes/sizes.go index 4ed115c..73834fd 100644 --- a/sizes/sizes.go +++ b/sizes/sizes.go @@ -210,8 +210,8 @@ type HistorySize struct { // The tree with the maximum expanded submodule count. MaxExpandedSubmoduleCountTree *Path `json:"max_expanded_submodule_count_tree,omitempty"` - - // The actual size of the .git directory on disk + + // The actual size of the .git directory on disk. GitDirSize counts.Count64 `json:"git_dir_size"` } From 56839e56669ba8498e3ae5a9eb85033518fec3da Mon Sep 17 00:00:00 2001 From: Scott Arbeit Date: Wed, 2 Apr 2025 14:49:41 -0400 Subject: [PATCH 05/13] More linter updates. --- git-sizer.go | 2 +- sizes/dirsize.go | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index 0f44aad..fe5876b 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -331,7 +331,7 @@ func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []st return fmt.Errorf("error scanning repository: %w", err) } - // Calculate the actual size of the .git directory + // Calculate the actual size of the .git directory. gitDir, err := repo.GitDir() if err != nil { return fmt.Errorf("error getting Git directory path: %w", err) diff --git a/sizes/dirsize.go b/sizes/dirsize.go index 3b60ed2..f490629 100644 --- a/sizes/dirsize.go +++ b/sizes/dirsize.go @@ -7,17 +7,17 @@ import ( "github.com/github/git-sizer/counts" ) -// CalculateGitDirSize returns the total size in bytes of the .git directory +// CalculateGitDirSize returns the total size in bytes of the .git directory. func CalculateGitDirSize(gitDir string) (counts.Count64, error) { var totalSize counts.Count64 err := filepath.Walk(gitDir, func(path string, info os.FileInfo, err error) error { if err != nil { - // Skip files we can't access + // Skip files we can't access. return nil } - // Only count files, not directories + // Only count files, not directories. if !info.IsDir() { totalSize.Increment(counts.Count64(info.Size())) } From 5d3b4abf0a77081573e636daf9083d36e4ecc1ec Mon Sep 17 00:00:00 2001 From: Scott Arbeit Date: Wed, 2 Apr 2025 14:56:48 -0400 Subject: [PATCH 06/13] Linter updates. --- sizes/dirsize.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sizes/dirsize.go b/sizes/dirsize.go index f490629..eb9eb99 100644 --- a/sizes/dirsize.go +++ b/sizes/dirsize.go @@ -13,8 +13,11 @@ func CalculateGitDirSize(gitDir string) (counts.Count64, error) { err := filepath.Walk(gitDir, func(path string, info os.FileInfo, err error) error { if err != nil { - // Skip files we can't access. - return nil + // Only skip errors for files we cannot access. + if os.IsNotExist(err) || os.IsPermission(err) { + return nil + } + return err } // Only count files, not directories. From f6a16d7b7a0e309f0701519920469fcf41579c62 Mon Sep 17 00:00:00 2001 From: Scott Arbeit Date: Mon, 7 Apr 2025 14:22:40 -0400 Subject: [PATCH 07/13] Simplified Makefile.win --- Makefile.win | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/Makefile.win b/Makefile.win index ec6f208..d37c9d7 100644 --- a/Makefile.win +++ b/Makefile.win @@ -19,36 +19,39 @@ endif # Find all Go source files GO_SRC_FILES := $(shell powershell -NoProfile -ExecutionPolicy Bypass -Command "Get-ChildItem -Path . -Filter *.go -Recurse | Select-Object -ExpandProperty FullName") +# Define common PowerShell command +PWSH := @powershell -NoProfile -ExecutionPolicy Bypass -Command + # Default target all: bin/git-sizer.exe # Main binary target - depend on all Go source files bin/git-sizer.exe: $(GO_SRC_FILES) - @powershell -NoProfile -ExecutionPolicy Bypass -Command "if (-not (Test-Path bin)) { New-Item -ItemType Directory -Path bin | Out-Null }" - $(GO) build $(GOFLAGS) -ldflags "$(LDFLAGS)" -a -o .\bin\git-sizer.exe . + $(PWSH) "if (-not (Test-Path bin)) { New-Item -ItemType Directory -Path bin | Out-Null }" + $(GO) build $(GOFLAGS) -ldflags "$(LDFLAGS)" -a -o .\bin\git-sizer.exe . # Test target - explicitly run the build first to ensure binary is up to date test: - @$(MAKE) -f Makefile.win bin/git-sizer.exe - @$(MAKE) -f Makefile.win gotest + @$(MAKE) -f Makefile.win bin/git-sizer.exe + @$(MAKE) -f Makefile.win gotest # Run go tests gotest: - $(GO) test -timeout 60s $(GOFLAGS) -ldflags "$(LDFLAGS)" ./... + $(GO) test -timeout 60s $(GOFLAGS) -ldflags "$(LDFLAGS)" ./... # Clean up builds clean: - @powershell -NoProfile -ExecutionPolicy Bypass -Command "if (Test-Path bin) { Remove-Item -Recurse -Force bin }" + $(PWSH) "if (Test-Path bin) { Remove-Item -Recurse -Force bin }" # Help target help: - @powershell -NoProfile -ExecutionPolicy Bypass -Command "Write-Host 'Windows Makefile for git-sizer' -ForegroundColor Cyan" - @powershell -NoProfile -ExecutionPolicy Bypass -Command "Write-Host ''" - @powershell -NoProfile -ExecutionPolicy Bypass -Command "Write-Host 'Targets:' -ForegroundColor Green" - @powershell -NoProfile -ExecutionPolicy Bypass -Command "Write-Host ' all - Build git-sizer (default)'" - @powershell -NoProfile -ExecutionPolicy Bypass -Command "Write-Host ' test - Run tests'" - @powershell -NoProfile -ExecutionPolicy Bypass -Command "Write-Host ' clean - Clean build artifacts'" - @powershell -NoProfile -ExecutionPolicy Bypass -Command "Write-Host ''" - @powershell -NoProfile -ExecutionPolicy Bypass -Command "Write-Host 'Example usage:' -ForegroundColor Green" - @powershell -NoProfile -ExecutionPolicy Bypass -Command "Write-Host ' nmake -f Makefile.win'" - @powershell -NoProfile -ExecutionPolicy Bypass -Command "Write-Host ' nmake -f Makefile.win test'" + $(PWSH) "Write-Host 'Windows Makefile for git-sizer' -ForegroundColor Cyan" + $(PWSH) "Write-Host ''" + $(PWSH) "Write-Host 'Targets:' -ForegroundColor Green" + $(PWSH) "Write-Host ' all - Build git-sizer (default)'" + $(PWSH) "Write-Host ' test - Run tests'" + $(PWSH) "Write-Host ' clean - Clean build artifacts'" + $(PWSH) "Write-Host ''" + $(PWSH) "Write-Host 'Example usage:' -ForegroundColor Green" + $(PWSH) "Write-Host ' nmake -f Makefile.win'" + $(PWSH) "Write-Host ' nmake -f Makefile.win test'" From 5b5042832001ba8a03d17f03d43e5ae40196869d Mon Sep 17 00:00:00 2001 From: Scott Arbeit Date: Mon, 7 Apr 2025 15:04:26 -0400 Subject: [PATCH 08/13] =?UTF-8?q?Repaired=20tabs=20in=20makefile=20?= =?UTF-8?q?=F0=9F=99=84;=20added=20variable=20set=20in=20script.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Makefile.win | 32 ++++++++++++++++---------------- script/ensure-go-installed.ps1 | 2 +- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/Makefile.win b/Makefile.win index d37c9d7..b772251 100644 --- a/Makefile.win +++ b/Makefile.win @@ -27,31 +27,31 @@ all: bin/git-sizer.exe # Main binary target - depend on all Go source files bin/git-sizer.exe: $(GO_SRC_FILES) - $(PWSH) "if (-not (Test-Path bin)) { New-Item -ItemType Directory -Path bin | Out-Null }" - $(GO) build $(GOFLAGS) -ldflags "$(LDFLAGS)" -a -o .\bin\git-sizer.exe . + $(PWSH) "if (-not (Test-Path bin)) { New-Item -ItemType Directory -Path bin | Out-Null }" + $(GO) build $(GOFLAGS) -ldflags "$(LDFLAGS)" -a -o .\bin\git-sizer.exe . # Test target - explicitly run the build first to ensure binary is up to date test: - @$(MAKE) -f Makefile.win bin/git-sizer.exe - @$(MAKE) -f Makefile.win gotest + @$(MAKE) -f Makefile.win bin/git-sizer.exe + @$(MAKE) -f Makefile.win gotest # Run go tests gotest: - $(GO) test -timeout 60s $(GOFLAGS) -ldflags "$(LDFLAGS)" ./... + $(GO) test -timeout 60s $(GOFLAGS) -ldflags "$(LDFLAGS)" ./... # Clean up builds clean: - $(PWSH) "if (Test-Path bin) { Remove-Item -Recurse -Force bin }" + $(PWSH) "if (Test-Path bin) { Remove-Item -Recurse -Force bin }" # Help target help: - $(PWSH) "Write-Host 'Windows Makefile for git-sizer' -ForegroundColor Cyan" - $(PWSH) "Write-Host ''" - $(PWSH) "Write-Host 'Targets:' -ForegroundColor Green" - $(PWSH) "Write-Host ' all - Build git-sizer (default)'" - $(PWSH) "Write-Host ' test - Run tests'" - $(PWSH) "Write-Host ' clean - Clean build artifacts'" - $(PWSH) "Write-Host ''" - $(PWSH) "Write-Host 'Example usage:' -ForegroundColor Green" - $(PWSH) "Write-Host ' nmake -f Makefile.win'" - $(PWSH) "Write-Host ' nmake -f Makefile.win test'" + $(PWSH) "Write-Host 'Windows Makefile for git-sizer' -ForegroundColor Cyan" + $(PWSH) "Write-Host ''" + $(PWSH) "Write-Host 'Targets:' -ForegroundColor Green" + $(PWSH) "Write-Host ' all - Build git-sizer (default)'" + $(PWSH) "Write-Host ' test - Run tests'" + $(PWSH) "Write-Host ' clean - Clean build artifacts'" + $(PWSH) "Write-Host ''" + $(PWSH) "Write-Host 'Example usage:' -ForegroundColor Green" + $(PWSH) "Write-Host ' make -f Makefile.win'" + $(PWSH) "Write-Host ' make -f Makefile.win test'" diff --git a/script/ensure-go-installed.ps1 b/script/ensure-go-installed.ps1 index 5479eec..3d653c1 100644 --- a/script/ensure-go-installed.ps1 +++ b/script/ensure-go-installed.ps1 @@ -1,7 +1,7 @@ # This script is meant to be sourced with ROOTDIR set. if (-not $env:ROOTDIR) { - Write-Error 'ensure-go-installed.ps1 invoked without ROOTDIR set!' + $env:ROOTDIR = (Resolve-Path (Join-Path $scriptDir "..")).Path } # Function to check if Go is installed and at least version 1.21 From d9da39ee5482e7b63977492f5720c1f8846bbabb Mon Sep 17 00:00:00 2001 From: Scott Arbeit Date: Tue, 15 Apr 2025 17:46:39 -0700 Subject: [PATCH 09/13] Added --include-unreachable flag and related processing and output. --- git-sizer.go | 14 +++++++ git/git.go | 97 ++++++++++++++++++++++++++++++++++++++++++++++++- sizes/output.go | 20 ++++++++-- sizes/sizes.go | 8 ++++ 4 files changed, 134 insertions(+), 5 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index fe5876b..3e6d870 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -13,6 +13,7 @@ import ( "github.com/spf13/pflag" + "github.com/github/git-sizer/counts" "github.com/github/git-sizer/git" "github.com/github/git-sizer/internal/refopts" "github.com/github/git-sizer/isatty" @@ -46,6 +47,7 @@ const usage = `usage: git-sizer [OPTS] [ROOT...] gitconfig: 'sizer.jsonVersion'. --[no-]progress report (don't report) progress to stderr. Can be set via gitconfig: 'sizer.progress'. + --include-unreachable include unreachable objects --version only report the git-sizer version number Object selection: @@ -131,6 +133,7 @@ func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []st var progress bool var version bool var showRefs bool + var includeUnreachable bool // Try to open the repository, but it's not an error yet if this // fails, because the user might only be asking for `--help`. @@ -207,6 +210,7 @@ func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []st rgb.AddRefopts(flags) flags.BoolVar(&showRefs, "show-refs", false, "list the references being processed") + flags.BoolVar(&includeUnreachable, "include-unreachable", false, "include unreachable objects") flags.SortFlags = false @@ -344,6 +348,16 @@ func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []st historySize.GitDirSize = gitDirSize + // Get unreachable object stats and add to output if requested + if includeUnreachable { + historySize.ShowUnreachable = true + unreachableStats, err := repo.GetUnreachableStats() + if err == nil { + historySize.UnreachableObjectCount = counts.Count32(unreachableStats.Count) + historySize.UnreachableObjectSize = counts.Count64(unreachableStats.Size) + } + } + if jsonOutput { var j []byte var err error diff --git a/git/git.go b/git/git.go index 5fc8f34..fc57bd7 100644 --- a/git/git.go +++ b/git/git.go @@ -1,13 +1,16 @@ package git import ( + "bufio" "bytes" "errors" "fmt" + "io" "io/fs" "os" "os/exec" "path/filepath" + "strings" ) // ObjectType represents the type of a Git object ("blob", "tree", @@ -157,7 +160,7 @@ func (repo *Repository) GitDir() (string, error) { return repo.gitDir, nil } -// GitPath returns that path of a file within the git repository, by +// GitPath returns the path of a file within the git repository, by // calling `git rev-parse --git-path $relPath`. The returned path is // relative to the current directory. func (repo *Repository) GitPath(relPath string) (string, error) { @@ -173,3 +176,95 @@ func (repo *Repository) GitPath(relPath string) (string, error) { // current directory, we can use it as-is: return string(bytes.TrimSpace(out)), nil } + +// UnreachableStats holds the count and size of unreachable objects. +type UnreachableStats struct { + Count int64 + Size int64 +} + +// GetUnreachableStats runs 'git fsck --unreachable --no-reflogs --full' +// and returns the count and total size of unreachable objects. +// This implementation collects all OIDs from fsck output and then uses +// batch mode to efficiently retrieve their sizes. +func (repo *Repository) GetUnreachableStats() (UnreachableStats, error) { + // Run git fsck. Using CombinedOutput captures both stdout and stderr. + cmd := exec.Command(repo.gitBin, "-C", repo.gitDir, "fsck", "--unreachable", "--no-reflogs", "--full") + cmd.Env = os.Environ() + output, err := cmd.CombinedOutput() + if err != nil { + fmt.Fprintln(os.Stderr) + fmt.Fprintln(os.Stderr, "An error occurred trying to process unreachable objects.") + os.Stderr.Write(output) + fmt.Fprintln(os.Stderr) + return UnreachableStats{Count: 0, Size: 0}, err + } + + var oids []string + count := int64(0) + for _, line := range bytes.Split(output, []byte{'\n'}) { + fields := bytes.Fields(line) + // Expected line format: "unreachable ..." + if len(fields) >= 3 && string(fields[0]) == "unreachable" { + count++ + oid := string(fields[2]) + oids = append(oids, oid) + } + } + + // Retrieve the total size using batch mode. + totalSize, err := repo.getTotalSizeFromOids(oids) + if err != nil { + return UnreachableStats{}, fmt.Errorf("failed to get sizes via batch mode: %w", err) + } + + return UnreachableStats{Count: count, Size: totalSize}, nil +} + +// getTotalSizeFromOids uses 'git cat-file --batch-check' to retrieve sizes for +// the provided OIDs. It writes each OID to stdin and reads back lines in the +// format: " ". +func (repo *Repository) getTotalSizeFromOids(oids []string) (int64, error) { + cmd := exec.Command(repo.gitBin, "-C", repo.gitDir, "cat-file", "--batch-check") + stdinPipe, err := cmd.StdinPipe() + if err != nil { + return 0, fmt.Errorf("failed to get stdin pipe: %w", err) + } + stdoutPipe, err := cmd.StdoutPipe() + if err != nil { + return 0, fmt.Errorf("failed to get stdout pipe: %w", err) + } + + if err := cmd.Start(); err != nil { + return 0, fmt.Errorf("failed to start git cat-file batch: %w", err) + } + + // Write all OIDs to the batch process. + go func() { + defer stdinPipe.Close() + for _, oid := range oids { + io.WriteString(stdinPipe, oid+"\n") + } + }() + + var totalSize int64 + scanner := bufio.NewScanner(stdoutPipe) + // Each line is expected to be: " " + for scanner.Scan() { + parts := strings.Fields(scanner.Text()) + if len(parts) == 3 { + var size int64 + fmt.Sscanf(parts[2], "%d", &size) + totalSize += size + } else { + return 0, fmt.Errorf("unexpected output format: %s", scanner.Text()) + } + } + if err := scanner.Err(); err != nil { + return 0, fmt.Errorf("error reading git cat-file output: %w", err) + } + if err := cmd.Wait(); err != nil { + return 0, fmt.Errorf("git cat-file batch process error: %w", err) + } + return totalSize, nil +} diff --git a/sizes/output.go b/sizes/output.go index 0538cb7..041bbbe 100644 --- a/sizes/output.go +++ b/sizes/output.go @@ -489,8 +489,7 @@ func (s *HistorySize) contents(refGroups []RefGroup) tableContents { rgis = append(rgis, rgi.Indented(indent)) } - return S( - "", + sections := []tableContents{ S( "Repository statistics", S( @@ -532,7 +531,6 @@ func (s *HistorySize) contents(refGroups []RefGroup) tableContents { "The actual on-disk size of the .git directory", nil, s.GitDirSize, binary, "B", 1e9), ), - S( "Annotated tags", I("uniqueTagCount", "Count", @@ -610,5 +608,19 @@ func (s *HistorySize) contents(refGroups []RefGroup) tableContents { "The maximum number of submodules in any checkout", s.MaxExpandedSubmoduleCountTree, s.MaxExpandedSubmoduleCount, metric, "", 100), ), - ) + } + + if s.ShowUnreachable { + sections = append(sections, S( + "Unreachable objects", + I("unreachableObjectCount", "Count", + "The total number of unreachable objects in the repository", + nil, s.UnreachableObjectCount, metric, "", 1e7), + I("unreachableObjectSize", "Uncompressed total size", + "The total size of unreachable objects in the repository", + nil, s.UnreachableObjectSize, binary, "B", 1e9), + )) + } + + return S("", sections...) } diff --git a/sizes/sizes.go b/sizes/sizes.go index 73834fd..1bde998 100644 --- a/sizes/sizes.go +++ b/sizes/sizes.go @@ -213,6 +213,14 @@ type HistorySize struct { // The actual size of the .git directory on disk. GitDirSize counts.Count64 `json:"git_dir_size"` + + // The total number of unreachable objects in the repository. + UnreachableObjectCount counts.Count64 `json:"unreachable_object_count"` + + // The total size of unreachable objects in the repository. + UnreachableObjectSize counts.Count64 `json:"unreachable_object_size"` + + ShowUnreachable bool `json:"-"` } // Convenience function: forget `*path` if it is non-nil and overwrite From 470d43ee7be7df3a27806df1a28c6320f3d6301f Mon Sep 17 00:00:00 2001 From: Scott Arbeit Date: Tue, 15 Apr 2025 18:23:06 -0700 Subject: [PATCH 10/13] Update git/git.go Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- git/git.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/git/git.go b/git/git.go index fc57bd7..aa49db4 100644 --- a/git/git.go +++ b/git/git.go @@ -189,7 +189,11 @@ type UnreachableStats struct { // batch mode to efficiently retrieve their sizes. func (repo *Repository) GetUnreachableStats() (UnreachableStats, error) { // Run git fsck. Using CombinedOutput captures both stdout and stderr. - cmd := exec.Command(repo.gitBin, "-C", repo.gitDir, "fsck", "--unreachable", "--no-reflogs", "--full") + gitDir, err := repo.GitDir() + if err != nil { + return UnreachableStats{Count: 0, Size: 0}, fmt.Errorf("failed to retrieve Git directory: %w", err) + } + cmd := exec.Command(repo.gitBin, "-C", gitDir, "fsck", "--unreachable", "--no-reflogs", "--full") cmd.Env = os.Environ() output, err := cmd.CombinedOutput() if err != nil { From 13288f3fb28720887ededc2b8f4f0530737003d2 Mon Sep 17 00:00:00 2001 From: Scott Arbeit Date: Tue, 15 Apr 2025 18:25:12 -0700 Subject: [PATCH 11/13] Fixed int32-int64 error. --- git-sizer.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git-sizer.go b/git-sizer.go index 3e6d870..45aca24 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -353,7 +353,7 @@ func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []st historySize.ShowUnreachable = true unreachableStats, err := repo.GetUnreachableStats() if err == nil { - historySize.UnreachableObjectCount = counts.Count32(unreachableStats.Count) + historySize.UnreachableObjectCount = counts.Count64(unreachableStats.Count) historySize.UnreachableObjectSize = counts.Count64(unreachableStats.Size) } } From e28408943fd2ee0e52749b589b13fbd3c8408c9c Mon Sep 17 00:00:00 2001 From: Scott Arbeit Date: Tue, 15 Apr 2025 18:46:12 -0700 Subject: [PATCH 12/13] Linter error fixes. --- git/git.go | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/git/git.go b/git/git.go index aa49db4..3ed9b4d 100644 --- a/git/git.go +++ b/git/git.go @@ -189,19 +189,12 @@ type UnreachableStats struct { // batch mode to efficiently retrieve their sizes. func (repo *Repository) GetUnreachableStats() (UnreachableStats, error) { // Run git fsck. Using CombinedOutput captures both stdout and stderr. - gitDir, err := repo.GitDir() + cmd := repo.GitCommand("fsck", "--unreachable", "--no-reflogs", "--full") + output, err := cmd.Output() if err != nil { - return UnreachableStats{Count: 0, Size: 0}, fmt.Errorf("failed to retrieve Git directory: %w", err) - } - cmd := exec.Command(repo.gitBin, "-C", gitDir, "fsck", "--unreachable", "--no-reflogs", "--full") - cmd.Env = os.Environ() - output, err := cmd.CombinedOutput() - if err != nil { - fmt.Fprintln(os.Stderr) - fmt.Fprintln(os.Stderr, "An error occurred trying to process unreachable objects.") - os.Stderr.Write(output) - fmt.Fprintln(os.Stderr) - return UnreachableStats{Count: 0, Size: 0}, err + return UnreachableStats{Count: 0, Size: 0}, fmt.Errorf( + "running 'git fsck --unreachable --no-reflogs --full': %w", err, + ) } var oids []string @@ -229,7 +222,7 @@ func (repo *Repository) GetUnreachableStats() (UnreachableStats, error) { // the provided OIDs. It writes each OID to stdin and reads back lines in the // format: " ". func (repo *Repository) getTotalSizeFromOids(oids []string) (int64, error) { - cmd := exec.Command(repo.gitBin, "-C", repo.gitDir, "cat-file", "--batch-check") + cmd := repo.GitCommand("cat-file", "--batch-check") stdinPipe, err := cmd.StdinPipe() if err != nil { return 0, fmt.Errorf("failed to get stdin pipe: %w", err) @@ -245,9 +238,16 @@ func (repo *Repository) getTotalSizeFromOids(oids []string) (int64, error) { // Write all OIDs to the batch process. go func() { - defer stdinPipe.Close() + defer func() { + if err := stdinPipe.Close(); err != nil { + fmt.Fprintf(os.Stderr, "failed to close stdin pipe: %v\n", err) + } + }() for _, oid := range oids { - io.WriteString(stdinPipe, oid+"\n") + if _, err := io.WriteString(stdinPipe, oid+"\n"); err != nil { + fmt.Fprintf(os.Stderr, "failed to write to stdin pipe: %v\n", err) + return + } } }() @@ -258,7 +258,9 @@ func (repo *Repository) getTotalSizeFromOids(oids []string) (int64, error) { parts := strings.Fields(scanner.Text()) if len(parts) == 3 { var size int64 - fmt.Sscanf(parts[2], "%d", &size) + if _, err := fmt.Sscanf(parts[2], "%d", &size); err != nil { + return 0, fmt.Errorf("failed to parse size from output: %w", err) + } totalSize += size } else { return 0, fmt.Errorf("unexpected output format: %s", scanner.Text()) From 99ec1bfa62e68c023026ea02b3ad2c37f277061c Mon Sep 17 00:00:00 2001 From: Scott Arbeit Date: Wed, 16 Apr 2025 16:43:17 -0700 Subject: [PATCH 13/13] Separated unreachable objects by type in the output. --- README.md | 71 ++++++++++++++++++++------------------- git-sizer.go | 15 +++++++-- git/git.go | 88 +++++++++++++++++++++++++++++++++++++------------ sizes/output.go | 38 +++++++++++++++++---- sizes/sizes.go | 26 ++++++++++++--- 5 files changed, 171 insertions(+), 67 deletions(-) diff --git a/README.md b/README.md index ea06895..9f64e51 100644 --- a/README.md +++ b/README.md @@ -89,66 +89,71 @@ Is your Git repository bursting at the seams? ## Usage -By default, `git-sizer` outputs its results in tabular format. For example, let's use it to analyze [the Linux repository](https://github.com/torvalds/linux), using the `--verbose` option so that all statistics are output: +By default, `git-sizer` outputs its results in tabular format. For example, let's use it to analyze [the Linux repository](https://github.com/torvalds/linux) (as of April, 2025), using the `--verbose` option so that all statistics are output: ``` $ git-sizer --verbose -Processing blobs: 1652370 -Processing trees: 3396199 -Processing commits: 722647 -Matching commits to trees: 722647 -Processing annotated tags: 534 -Processing references: 539 +Processing blobs: 2928490 +Processing trees: 6510174 +Processing commits: 1351500 +Matching commits to trees: 1351500 +Processing annotated tags: 877 +Processing references: 883 + | Name | Value | Level of concern | | ---------------------------- | --------- | ------------------------------ | -| Overall repository size | | | +| Repository statistics | | | | * Commits | | | -| * Count | 723 k | * | -| * Total size | 525 MiB | ** | +| * Count | 1.35 M | ** | +| * Total size | 1.11 GiB | **** | | * Trees | | | -| * Count | 3.40 M | ** | -| * Total size | 9.00 GiB | **** | -| * Total tree entries | 264 M | ***** | +| * Count | 6.51 M | **** | +| * Total size | 19.0 GiB | ********** | +| * Total tree entries | 547 M | ********** | | * Blobs | | | -| * Count | 1.65 M | * | -| * Total size | 55.8 GiB | ***** | +| * Count | 2.93 M | * | +| * Uncompressed total size | 115 GiB | ************ | +| * On-disk size | | | +| * Compressed total size | 5.68 GiB | ****** | | * Annotated tags | | | -| * Count | 534 | | +| * Count | 877 | | | * References | | | -| * Count | 539 | | +| * Count | 883 | | +| * Branches | 1 | | +| * Tags | 880 | | +| * Remote-tracking refs | 2 | | | | | | | Biggest objects | | | | * Commits | | | | * Maximum size [1] | 72.7 KiB | * | | * Maximum parents [2] | 66 | ****** | | * Trees | | | -| * Maximum entries [3] | 1.68 k | * | +| * Maximum entries [3] | 2.60 k | ** | | * Blobs | | | -| * Maximum size [4] | 13.5 MiB | * | +| * Maximum size [4] | 22.8 MiB | ** | | | | | | History structure | | | -| * Maximum history depth | 136 k | | +| * Maximum history depth | 198 k | | | * Maximum tag depth [5] | 1 | | | | | | | Biggest checkouts | | | -| * Number of directories [6] | 4.38 k | ** | -| * Maximum path depth [7] | 13 | * | -| * Maximum path length [8] | 134 B | * | -| * Number of files [9] | 62.3 k | * | -| * Total size of files [9] | 747 MiB | | -| * Number of symlinks [10] | 40 | | +| * Number of directories [6] | 5.89 k | ** | +| * Maximum path depth [6] | 14 | * | +| * Maximum path length [7] | 134 B | * | +| * Number of files [8] | 88.7 k | * | +| * Total size of files [9] | 1.41 GiB | * | +| * Number of symlinks [6] | 78 | | | * Number of submodules | 0 | | [1] 91cc53b0c78596a73fa708cceb7313e7168bb146 [2] 2cde51fbd0f310c8a2c5f977e665c0ac3945b46d -[3] 4f86eed5893207aca2c2da86b35b38f2e1ec1fc8 (refs/heads/master:arch/arm/boot/dts) -[4] a02b6794337286bc12c907c33d5d75537c240bd0 (refs/heads/master:drivers/gpu/drm/amd/include/asic_reg/vega10/NBIO/nbio_6_1_sh_mask.h) +[3] ac1d84c335bcbd5fc5d82b8e985d8a9cc4c67d79 (6a1d798feb65d2a67e6e2cafb0b0e4f430603226:arch/arm/boot/dts) +[4] c20bf730dc553e5ae44ad9e769b1f8dface9fa9e (refs/heads/master:drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_2_0_sh_mask.h) [5] 5dc01c595e6c6ec9ccda4f6f69c131c0dd945f8c (refs/tags/v2.6.11) -[6] 1459754b9d9acc2ffac8525bed6691e15913c6e2 (589b754df3f37ca0a1f96fccde7f91c59266f38a^{tree}) -[7] 78a269635e76ed927e17d7883f2d90313570fdbc (dae09011115133666e47c35673c0564b0a702db7^{tree}) -[8] ce5f2e31d3bdc1186041fdfd27a5ac96e728f2c5 (refs/heads/master^{tree}) -[9] 532bdadc08402b7a72a4b45a2e02e5c710b7d626 (e9ef1fe312b533592e39cddc1327463c30b0ed8d^{tree}) -[10] f29a5ea76884ac37e1197bef1941f62fda3f7b99 (f5308d1b83eba20e69df5e0926ba7257c8dd9074^{tree}) +[6] 549fc717f82345cf115dfa586ce076a8d1f296a6 (refs/heads/master^{tree}) +[7] b0da5ce619daec8138cf92dfcf00e7a51ce856a9 (d8763340d2cb6262fb86424315a1f92cabc0e23c^{tree}) +[8] fd94fec4e9c4e08df8e919e57fcc974c52c88c3c (3491aa04787f4d7e00da98d94b1b10001c398b5a^{tree}) +[9] 80e16948c5baba02ea2eeda7aa4b2478b68bbaf0 (524c03585fda36584cc7ada49a1827666d37eb4e^{tree}) ``` The output is a table showing the thing that was measured, its numerical value, and a rough indication of which values might be a cause for concern. In all cases, only objects that are reachable from references are included (i.e., not unreachable objects, nor objects that are reachable only from the reflogs). diff --git a/git-sizer.go b/git-sizer.go index 45aca24..57568ab 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -47,7 +47,7 @@ const usage = `usage: git-sizer [OPTS] [ROOT...] gitconfig: 'sizer.jsonVersion'. --[no-]progress report (don't report) progress to stderr. Can be set via gitconfig: 'sizer.progress'. - --include-unreachable include unreachable objects + --include-unreachable include unreachable objects in the analysis --version only report the git-sizer version number Object selection: @@ -353,8 +353,15 @@ func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []st historySize.ShowUnreachable = true unreachableStats, err := repo.GetUnreachableStats() if err == nil { - historySize.UnreachableObjectCount = counts.Count64(unreachableStats.Count) - historySize.UnreachableObjectSize = counts.Count64(unreachableStats.Size) + // Store per-type unreachable stats for output + historySize.UnreachableBlobsCount = counts.Count64(unreachableStats.Blobs.Count) + historySize.UnreachableBlobsSize = counts.Count64(unreachableStats.Blobs.Size) + historySize.UnreachableTreesCount = counts.Count64(unreachableStats.Trees.Count) + historySize.UnreachableTreesSize = counts.Count64(unreachableStats.Trees.Size) + historySize.UnreachableCommitsCount = counts.Count64(unreachableStats.Commits.Count) + historySize.UnreachableCommitsSize = counts.Count64(unreachableStats.Commits.Size) + historySize.UnreachableTagsCount = counts.Count64(unreachableStats.Tags.Count) + historySize.UnreachableTagsSize = counts.Count64(unreachableStats.Tags.Size) } } @@ -374,6 +381,8 @@ func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []st } fmt.Fprintf(stdout, "%s\n", j) } else { + // Print a blank line between progress output and the table + fmt.Fprintln(stdout) if _, err := io.WriteString( stdout, historySize.TableString(rg.Groups(), threshold, nameStyle), ); err != nil { diff --git a/git/git.go b/git/git.go index 3ed9b4d..68509cf 100644 --- a/git/git.go +++ b/git/git.go @@ -177,45 +177,91 @@ func (repo *Repository) GitPath(relPath string) (string, error) { return string(bytes.TrimSpace(out)), nil } -// UnreachableStats holds the count and size of unreachable objects. +// UnreachableStats holds the count and size of unreachable objects, broken out by type. type UnreachableStats struct { - Count int64 - Size int64 + Blobs struct { + Count int64 + Size int64 + } + Trees struct { + Count int64 + Size int64 + } + Commits struct { + Count int64 + Size int64 + } + Tags struct { + Count int64 + Size int64 + } } -// GetUnreachableStats runs 'git fsck --unreachable --no-reflogs --full' -// and returns the count and total size of unreachable objects. -// This implementation collects all OIDs from fsck output and then uses -// batch mode to efficiently retrieve their sizes. +// GetUnreachableStats runs 'git fsck --unreachable --no-reflogs' +// and returns the count and total size of unreachable objects, broken out by type. func (repo *Repository) GetUnreachableStats() (UnreachableStats, error) { - // Run git fsck. Using CombinedOutput captures both stdout and stderr. - cmd := repo.GitCommand("fsck", "--unreachable", "--no-reflogs", "--full") + cmd := repo.GitCommand("fsck", "--unreachable", "--no-reflogs") output, err := cmd.Output() if err != nil { - return UnreachableStats{Count: 0, Size: 0}, fmt.Errorf( - "running 'git fsck --unreachable --no-reflogs --full': %w", err, + return UnreachableStats{}, fmt.Errorf( + "running 'git fsck --unreachable --no-reflogs': %w", err, ) } - var oids []string - count := int64(0) + // Collect OIDs by type + + oidsByType := map[string][]string{ + "blob": {}, + "tree": {}, + "commit": {}, + "tag": {}, + } + countsByType := map[string]int64{ + "blob": 0, + "tree": 0, + "commit": 0, + "tag": 0, + } + for _, line := range bytes.Split(output, []byte{'\n'}) { fields := bytes.Fields(line) // Expected line format: "unreachable ..." if len(fields) >= 3 && string(fields[0]) == "unreachable" { - count++ - oid := string(fields[2]) - oids = append(oids, oid) + typeStr := string(fields[1]) + if _, ok := oidsByType[typeStr]; ok { + oid := string(fields[2]) + oidsByType[typeStr] = append(oidsByType[typeStr], oid) + countsByType[typeStr]++ + } } } - // Retrieve the total size using batch mode. - totalSize, err := repo.getTotalSizeFromOids(oids) - if err != nil { - return UnreachableStats{}, fmt.Errorf("failed to get sizes via batch mode: %w", err) + var stats UnreachableStats + var errBlob, errTree, errCommit, errTag error + stats.Blobs.Count = countsByType["blob"] + stats.Trees.Count = countsByType["tree"] + stats.Commits.Count = countsByType["commit"] + stats.Tags.Count = countsByType["tag"] + + stats.Blobs.Size, errBlob = repo.getTotalSizeFromOids(oidsByType["blob"]) + stats.Trees.Size, errTree = repo.getTotalSizeFromOids(oidsByType["tree"]) + stats.Commits.Size, errCommit = repo.getTotalSizeFromOids(oidsByType["commit"]) + stats.Tags.Size, errTag = repo.getTotalSizeFromOids(oidsByType["tag"]) + + if errBlob != nil { + return stats, fmt.Errorf("failed to get blob sizes: %w", errBlob) + } + if errTree != nil { + return stats, fmt.Errorf("failed to get tree sizes: %w", errTree) + } + if errCommit != nil { + return stats, fmt.Errorf("failed to get commit sizes: %w", errCommit) + } + if errTag != nil { + return stats, fmt.Errorf("failed to get tag sizes: %w", errTag) } - return UnreachableStats{Count: count, Size: totalSize}, nil + return stats, nil } // getTotalSizeFromOids uses 'git cat-file --batch-check' to retrieve sizes for diff --git a/sizes/output.go b/sizes/output.go index 041bbbe..74c8fef 100644 --- a/sizes/output.go +++ b/sizes/output.go @@ -613,12 +613,38 @@ func (s *HistorySize) contents(refGroups []RefGroup) tableContents { if s.ShowUnreachable { sections = append(sections, S( "Unreachable objects", - I("unreachableObjectCount", "Count", - "The total number of unreachable objects in the repository", - nil, s.UnreachableObjectCount, metric, "", 1e7), - I("unreachableObjectSize", "Uncompressed total size", - "The total size of unreachable objects in the repository", - nil, s.UnreachableObjectSize, binary, "B", 1e9), + S("Blobs", + I("unreachableBlobsCount", "Count", + "The total number of unreachable blobs in the repository", + nil, s.UnreachableBlobsCount, metric, "", 1.5e6), + I("unreachableBlobsSize", "Uncompressed total size", + "The total size of unreachable blobs in the repository", + nil, s.UnreachableBlobsSize, binary, "B", 1e9), + ), + S("Trees", + I("unreachableTreesCount", "Count", + "The total number of unreachable trees in the repository", + nil, s.UnreachableTreesCount, metric, "", 1.5e6), + I("unreachableTreesSize", "Total size", + "The total size of unreachable trees in the repository", + nil, s.UnreachableTreesSize, binary, "B", 2e9), + ), + S("Commits", + I("unreachableCommitsCount", "Count", + "The total number of unreachable commits in the repository", + nil, s.UnreachableCommitsCount, metric, "", 500e3), + I("unreachableCommitsSize", "Total size", + "The total size of unreachable commits in the repository", + nil, s.UnreachableCommitsSize, binary, "B", 250e6), + ), + S("Tags", + I("unreachableTagsCount", "Count", + "The total number of unreachable tags in the repository", + nil, s.UnreachableTagsCount, metric, "", 25e3), + I("unreachableTagsSize", "Total size", + "The total size of unreachable tags in the repository", + nil, s.UnreachableTagsSize, binary, "B", 250e6), + ), )) } diff --git a/sizes/sizes.go b/sizes/sizes.go index 1bde998..2a86b68 100644 --- a/sizes/sizes.go +++ b/sizes/sizes.go @@ -214,11 +214,29 @@ type HistorySize struct { // The actual size of the .git directory on disk. GitDirSize counts.Count64 `json:"git_dir_size"` - // The total number of unreachable objects in the repository. - UnreachableObjectCount counts.Count64 `json:"unreachable_object_count"` + // The total number of unreachable blobs in the repository. + UnreachableBlobsCount counts.Count64 `json:"unreachable_blobs_count"` - // The total size of unreachable objects in the repository. - UnreachableObjectSize counts.Count64 `json:"unreachable_object_size"` + // The total size of unreachable blobs in the repository. + UnreachableBlobsSize counts.Count64 `json:"unreachable_blobs_size"` + + // The total number of unreachable trees in the repository. + UnreachableTreesCount counts.Count64 `json:"unreachable_trees_count"` + + // The total size of unreachable trees in the repository. + UnreachableTreesSize counts.Count64 `json:"unreachable_trees_size"` + + // The total number of unreachable commits in the repository. + UnreachableCommitsCount counts.Count64 `json:"unreachable_commits_count"` + + // The total size of unreachable commits in the repository. + UnreachableCommitsSize counts.Count64 `json:"unreachable_commits_size"` + + // The total number of unreachable tags in the repository. + UnreachableTagsCount counts.Count64 `json:"unreachable_tags_count"` + + // The total size of unreachable tags in the repository. + UnreachableTagsSize counts.Count64 `json:"unreachable_tags_size"` ShowUnreachable bool `json:"-"` }