Skip to content

feat(docker): implement exclude paths functionality #4057

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
8 changes: 5 additions & 3 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,9 +178,10 @@ var (
circleCiScan = cli.Command("circleci", "Scan CircleCI")
circleCiScanToken = circleCiScan.Flag("token", "CircleCI token. Can also be provided with environment variable").Envar("CIRCLECI_TOKEN").Required().String()

dockerScan = cli.Command("docker", "Scan Docker Image")
dockerScanImages = dockerScan.Flag("image", "Docker image to scan. Use the file:// prefix to point to a local tarball, otherwise a image registry is assumed.").Required().Strings()
dockerScanToken = dockerScan.Flag("token", "Docker bearer token. Can also be provided with environment variable").Envar("DOCKER_TOKEN").String()
dockerScan = cli.Command("docker", "Scan Docker Image")
dockerScanImages = dockerScan.Flag("image", "Docker image to scan. Use the file:// prefix to point to a local tarball, otherwise a image registry is assumed.").Required().Strings()
dockerScanToken = dockerScan.Flag("token", "Docker bearer token. Can also be provided with environment variable").Envar("DOCKER_TOKEN").String()
dockerExcludePaths = dockerScan.Flag("exclude-paths", "Comma separated list of paths to exclude from scan").String()

travisCiScan = cli.Command("travisci", "Scan TravisCI")
travisCiScanToken = travisCiScan.Flag("token", "TravisCI token. Can also be provided with environment variable").Envar("TRAVISCI_TOKEN").Required().String()
Expand Down Expand Up @@ -835,6 +836,7 @@ func runSingleScan(ctx context.Context, cmd string, cfg engine.Config) (metrics,
BearerToken: *dockerScanToken,
Images: *dockerScanImages,
UseDockerKeychain: *dockerScanToken == "",
ExcludePaths: strings.Split(*dockerExcludePaths, ","),
}
if ref, err = eng.ScanDocker(ctx, cfg); err != nil {
return scanMetrics, fmt.Errorf("failed to scan Docker: %v", err)
Expand Down
5 changes: 4 additions & 1 deletion pkg/engine/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@ import (

// ScanDocker scans a given docker connection.
func (e *Engine) ScanDocker(ctx context.Context, c sources.DockerConfig) (sources.JobProgressRef, error) {
connection := &sourcespb.Docker{Images: c.Images}
connection := &sourcespb.Docker{
Images: c.Images,
ExcludePaths: c.ExcludePaths,
}

switch {
case c.UseDockerKeychain:
Expand Down
1,225 changes: 618 additions & 607 deletions pkg/pb/sourcespb/sources.pb.go

Large diffs are not rendered by default.

47 changes: 41 additions & 6 deletions pkg/sources/docker/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"io"
"net"
"net/http"
"regexp"
"strings"
"time"

Expand All @@ -30,12 +31,14 @@ import (
const SourceType = sourcespb.SourceType_SOURCE_TYPE_DOCKER

type Source struct {
name string
sourceId sources.SourceID
jobId sources.JobID
verify bool
concurrency int
conn sourcespb.Docker
name string
sourceId sources.SourceID
jobId sources.JobID
verify bool
concurrency int
conn sourcespb.Docker
excludePaths []string
excludeRegexes []*regexp.Regexp
sources.Progress
sources.CommonSourceUnitUnmarshaller
}
Expand Down Expand Up @@ -74,6 +77,22 @@ func (s *Source) Init(_ context.Context, name string, jobId sources.JobID, sourc
return fmt.Errorf("error unmarshalling connection: %w", err)
}

// Extract exclude paths from connection and compile regexes
if paths := s.conn.GetExcludePaths(); len(paths) > 0 {
s.excludePaths = paths
s.excludeRegexes = make([]*regexp.Regexp, len(paths))
for i, path := range paths {
// Convert wildcard pattern to regex
pattern := strings.ReplaceAll(path, "*", ".*")
pattern = "^" + pattern + "$"
regex, err := regexp.Compile(pattern)
if err != nil {
return fmt.Errorf("error compiling exclude path regex for %q: %w", path, err)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You might also consider adding the pattern being compiled to this error message; if the error is caused by something added during pattern generation, users are likely to be quite confused.

}
s.excludeRegexes[i] = regex
}
}

return nil
}

Expand Down Expand Up @@ -349,6 +368,22 @@ func (s *Source) processChunk(ctx context.Context, info chunkProcessingInfo, chu
return nil
}

// Check if the file should be excluded
filePath := "/" + info.name
ctx.Logger().V(2).Info("checking file against exclude paths", "file", filePath, "exclude_paths", s.excludePaths)
for i, excludePath := range s.excludePaths {
// Check for exact path match first (no regex needed)
if filePath == excludePath {
ctx.Logger().V(2).Info("skipping file: matches exclude path exactly", "file", filePath, "exclude_path", excludePath)
return nil
}
// Then check against pre-compiled regex
if s.excludeRegexes[i].MatchString(filePath) {
ctx.Logger().V(2).Info("skipping file: matches exclude pattern", "file", filePath, "exclude_path", excludePath)
return nil
}
}

chunkReader := sources.NewChunkReader()
chunkResChan := chunkReader(ctx, info.reader)

Expand Down
214 changes: 214 additions & 0 deletions pkg/sources/docker/docker_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package docker

import (
"regexp"
"strings"
"sync"
"testing"
Expand Down Expand Up @@ -166,3 +167,216 @@ func isHistoryChunk(t *testing.T, chunk *sources.Chunk) bool {
return metadata != nil &&
strings.HasPrefix(metadata.File, "image-metadata:history:")
}

func TestDockerExcludeExactPath(t *testing.T) {
dockerConn := &sourcespb.Docker{
Credential: &sourcespb.Docker_Unauthenticated{
Unauthenticated: &credentialspb.Unauthenticated{},
},
Images: []string{"test-image"},
ExcludePaths: []string{"/var/log/test"},
}

conn := &anypb.Any{}
err := conn.MarshalFrom(dockerConn)
assert.NoError(t, err)

s := &Source{}
err = s.Init(context.TODO(), "test source", 0, 0, false, conn, 1)
assert.NoError(t, err)

// Create test data
testFiles := []struct {
path string
excluded bool
}{
{"/var/log/test", true}, // Should be excluded (exact match)
{"/var/log/test2", false}, // Should not be excluded (different path)
{"/var/log/test/sub", false}, // Should not be excluded (subdirectory)
{"/var/log/other", false}, // Should not be excluded (different path)
}

for _, tf := range testFiles {
excluded := false
for _, excludePath := range s.excludePaths {
if tf.path == excludePath {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're only checking for exact matches here, which means that you're not actually exercising the match logic you've added to the source. This test is effectively adding a single item to a slice and then asserting that a few other items are not in the slice. You don't need a whole source implementation to test that! I think you can just remove this test entirely.

excluded = true
break
}
}
assert.Equal(t, tf.excluded, excluded, "Unexpected exclusion result for path: %s", tf.path)
}
}

func TestDockerExcludeWildcardPath(t *testing.T) {
dockerConn := &sourcespb.Docker{
Credential: &sourcespb.Docker_Unauthenticated{
Unauthenticated: &credentialspb.Unauthenticated{},
},
Images: []string{"test-image"},
ExcludePaths: []string{"/var/log/test/*"},
}

conn := &anypb.Any{}
err := conn.MarshalFrom(dockerConn)
assert.NoError(t, err)

s := &Source{}
err = s.Init(context.TODO(), "test source", 0, 0, false, conn, 1)
assert.NoError(t, err)

// Create test data
testFiles := []struct {
path string
excluded bool
}{
{"/var/log/test/file1", true}, // Should be excluded (direct child)
{"/var/log/test/sub/file2", true}, // Should be excluded (nested child)
{"/var/log/test", false}, // Should not be excluded (parent dir)
{"/var/log/test2/file", false}, // Should not be excluded (different dir)
{"/var/log/other/file", false}, // Should not be excluded (unrelated)
}

for _, tf := range testFiles {
excluded := false
for _, excludePath := range s.excludePaths {
// Convert wildcard pattern to regex
pattern := strings.ReplaceAll(excludePath, "*", ".*")
pattern = "^" + pattern + "$"
isMatch, err := regexp.MatchString(pattern, tf.path)
assert.NoError(t, err)
if isMatch {
excluded = true
break
}
}
assert.Equal(t, tf.excluded, excluded, "Unexpected exclusion result for path: %s", tf.path)
}
}

func TestShouldExclude(t *testing.T) {
tests := []struct {
name string
excludePaths []string
testPath string
want bool
}{
{
name: "exact match should exclude",
excludePaths: []string{"/var/log/test"},
testPath: "/var/log/test",
want: true,
},
{
name: "non-matching path should not exclude",
excludePaths: []string{"/var/log/test"},
testPath: "/var/log/other",
want: false,
},
{
name: "wildcard should match children",
excludePaths: []string{"/var/log/*"},
testPath: "/var/log/test",
want: true,
},
{
name: "wildcard should not match parent",
excludePaths: []string{"/var/log/*/deep"},
testPath: "/var/log",
want: false,
},
{
name: "multiple patterns should work",
excludePaths: []string{"/var/log/*", "/etc/nginx/*", "/tmp/test"},
testPath: "/etc/nginx/conf.d/default.conf",
want: true,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
dockerConn := &sourcespb.Docker{
Credential: &sourcespb.Docker_Unauthenticated{
Unauthenticated: &credentialspb.Unauthenticated{},
},
ExcludePaths: tt.excludePaths,
}

conn := &anypb.Any{}
err := conn.MarshalFrom(dockerConn)
assert.NoError(t, err)

s := &Source{}
err = s.Init(context.TODO(), "test", 0, 0, false, conn, 1)
assert.NoError(t, err)

// Test if the path should be excluded
excluded := false
for _, path := range tt.excludePaths {
if tt.testPath == path {
excluded = true
break
}
// Convert wildcard pattern to regex
pattern := strings.ReplaceAll(path, "*", ".*")
pattern = "^" + pattern + "$"
isMatch, err := regexp.MatchString(pattern, tt.testPath)
assert.NoError(t, err)
if isMatch {
excluded = true
break
}
}
assert.Equal(t, tt.want, excluded)
Comment on lines +313 to +330
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I love the idea of this new test! But notice how s isn't involved in any of this test code. That means that it's not actually testing the source at all! This is why I suggested creating a method on Source that actually checks for exclusion. If you do that, you can use it in the source and also test it here.

})
}
}

func TestDockerScanWithExclusions(t *testing.T) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test is great!

dockerConn := &sourcespb.Docker{
Credential: &sourcespb.Docker_Unauthenticated{
Unauthenticated: &credentialspb.Unauthenticated{},
},
Images: []string{"trufflesecurity/secrets@sha256:864f6d41209462d8e37fc302ba1532656e265f7c361f11e29fed6ca1f4208e11"},
ExcludePaths: []string{"/aws"}, // This path exists in the test image
}

conn := &anypb.Any{}
err := conn.MarshalFrom(dockerConn)
assert.NoError(t, err)

s := &Source{}
err = s.Init(context.TODO(), "test source", 0, 0, false, conn, 1)
assert.NoError(t, err)

var wg sync.WaitGroup
chunksChan := make(chan *sources.Chunk, 1)
foundExcludedPath := false

wg.Add(1)
go func() {
defer wg.Done()
for chunk := range chunksChan {
// Skip history chunks
if isHistoryChunk(t, chunk) {
continue
}

metadata := chunk.SourceMetadata.GetDocker()
assert.NotNil(t, metadata)

// Check if we found a chunk with the excluded path
if metadata.File == "/aws" {
foundExcludedPath = true
}
}
}()

err = s.Chunks(context.TODO(), chunksChan)
assert.NoError(t, err)

close(chunksChan)
wg.Wait()

assert.False(t, foundExcludedPath, "Found a chunk that should have been excluded")
}
2 changes: 2 additions & 0 deletions pkg/sources/sources.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,8 @@ type DockerConfig struct {
BearerToken string
// UseDockerKeychain determines whether to use the Docker keychain.
UseDockerKeychain bool
// ExcludePaths is a list of paths to exclude from scanning.
ExcludePaths []string
}

// GCSConfig defines the optional configuration for a GCS source.
Expand Down
1 change: 1 addition & 0 deletions proto/sources.proto
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ message Docker {
bool docker_keychain = 4;
}
repeated string images = 5;
repeated string exclude_paths = 6;
}

message ECR {
Expand Down
Loading