Skip to content

feat(docker): implement exclude paths functionality #4057

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
8 changes: 5 additions & 3 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,9 +178,10 @@ var (
circleCiScan = cli.Command("circleci", "Scan CircleCI")
circleCiScanToken = circleCiScan.Flag("token", "CircleCI token. Can also be provided with environment variable").Envar("CIRCLECI_TOKEN").Required().String()

dockerScan = cli.Command("docker", "Scan Docker Image")
dockerScanImages = dockerScan.Flag("image", "Docker image to scan. Use the file:// prefix to point to a local tarball, otherwise a image registry is assumed.").Required().Strings()
dockerScanToken = dockerScan.Flag("token", "Docker bearer token. Can also be provided with environment variable").Envar("DOCKER_TOKEN").String()
dockerScan = cli.Command("docker", "Scan Docker Image")
dockerScanImages = dockerScan.Flag("image", "Docker image to scan. Use the file:// prefix to point to a local tarball, otherwise a image registry is assumed.").Required().Strings()
dockerScanToken = dockerScan.Flag("token", "Docker bearer token. Can also be provided with environment variable").Envar("DOCKER_TOKEN").String()
dockerExcludePaths = dockerScan.Flag("exclude-paths", "Comma separated list of paths to exclude from scan").String()

travisCiScan = cli.Command("travisci", "Scan TravisCI")
travisCiScanToken = travisCiScan.Flag("token", "TravisCI token. Can also be provided with environment variable").Envar("TRAVISCI_TOKEN").Required().String()
Expand Down Expand Up @@ -835,6 +836,7 @@ func runSingleScan(ctx context.Context, cmd string, cfg engine.Config) (metrics,
BearerToken: *dockerScanToken,
Images: *dockerScanImages,
UseDockerKeychain: *dockerScanToken == "",
ExcludePaths: strings.Split(*dockerExcludePaths, ","),
}
if ref, err = eng.ScanDocker(ctx, cfg); err != nil {
return scanMetrics, fmt.Errorf("failed to scan Docker: %v", err)
Expand Down
5 changes: 4 additions & 1 deletion pkg/engine/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@ import (

// ScanDocker scans a given docker connection.
func (e *Engine) ScanDocker(ctx context.Context, c sources.DockerConfig) (sources.JobProgressRef, error) {
connection := &sourcespb.Docker{Images: c.Images}
connection := &sourcespb.Docker{
Images: c.Images,
ExcludePaths: c.ExcludePaths,
}

switch {
case c.UseDockerKeychain:
Expand Down
1,225 changes: 618 additions & 607 deletions pkg/pb/sourcespb/sources.pb.go

Large diffs are not rendered by default.

39 changes: 33 additions & 6 deletions pkg/sources/docker/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"io"
"net"
"net/http"
"regexp"
"strings"
"time"

Expand All @@ -30,12 +31,13 @@ import (
const SourceType = sourcespb.SourceType_SOURCE_TYPE_DOCKER

type Source struct {
name string
sourceId sources.SourceID
jobId sources.JobID
verify bool
concurrency int
conn sourcespb.Docker
name string
sourceId sources.SourceID
jobId sources.JobID
verify bool
concurrency int
conn sourcespb.Docker
excludePaths []string
sources.Progress
sources.CommonSourceUnitUnmarshaller
}
Expand Down Expand Up @@ -74,6 +76,11 @@ func (s *Source) Init(_ context.Context, name string, jobId sources.JobID, sourc
return fmt.Errorf("error unmarshalling connection: %w", err)
}

// Extract exclude paths from connection
if paths := s.conn.GetExcludePaths(); len(paths) > 0 {
s.excludePaths = paths
}

return nil
}

Expand Down Expand Up @@ -349,6 +356,26 @@ func (s *Source) processChunk(ctx context.Context, info chunkProcessingInfo, chu
return nil
}

// Check if the file should be excluded
filePath := "/" + info.name
ctx.Logger().V(2).Info("checking file against exclude paths", "file", filePath, "exclude_paths", s.excludePaths)
for _, excludePath := range s.excludePaths {
// Convert wildcard pattern to regex
pattern := strings.ReplaceAll(excludePath, "*", ".*")
pattern = "^" + pattern + "$"

matched, err := regexp.MatchString(pattern, filePath)
if err != nil {
ctx.Logger().Error(err, "error matching exclude pattern", "pattern", pattern, "file", filePath)
continue
}

if matched {
ctx.Logger().V(2).Info("skipping file: matches exclude path", "file", filePath, "exclude_path", excludePath)
return nil
}
}

chunkReader := sources.NewChunkReader()
chunkResChan := chunkReader(ctx, info.reader)

Expand Down
87 changes: 87 additions & 0 deletions pkg/sources/docker/docker_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package docker

import (
"regexp"
"strings"
"sync"
"testing"
Expand Down Expand Up @@ -166,3 +167,89 @@ func isHistoryChunk(t *testing.T, chunk *sources.Chunk) bool {
return metadata != nil &&
strings.HasPrefix(metadata.File, "image-metadata:history:")
}

func TestDockerExcludeExactPath(t *testing.T) {
dockerConn := &sourcespb.Docker{
Credential: &sourcespb.Docker_Unauthenticated{
Unauthenticated: &credentialspb.Unauthenticated{},
},
Images: []string{"test-image"},
ExcludePaths: []string{"/var/log/test"},
}

conn := &anypb.Any{}
err := conn.MarshalFrom(dockerConn)
assert.NoError(t, err)

s := &Source{}
err = s.Init(context.TODO(), "test source", 0, 0, false, conn, 1)
assert.NoError(t, err)

// Create test data
testFiles := []struct {
path string
excluded bool
}{
{"/var/log/test", true}, // Should be excluded (exact match)
{"/var/log/test2", false}, // Should not be excluded (different path)
{"/var/log/test/sub", false}, // Should not be excluded (subdirectory)
{"/var/log/other", false}, // Should not be excluded (different path)
}

for _, tf := range testFiles {
excluded := false
for _, excludePath := range s.excludePaths {
if tf.path == excludePath {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're only checking for exact matches here, which means that you're not actually exercising the match logic you've added to the source. This test is effectively adding a single item to a slice and then asserting that a few other items are not in the slice. You don't need a whole source implementation to test that! I think you can just remove this test entirely.

excluded = true
break
}
}
assert.Equal(t, tf.excluded, excluded, "Unexpected exclusion result for path: %s", tf.path)
}
}

func TestDockerExcludeWildcardPath(t *testing.T) {
dockerConn := &sourcespb.Docker{
Credential: &sourcespb.Docker_Unauthenticated{
Unauthenticated: &credentialspb.Unauthenticated{},
},
Images: []string{"test-image"},
ExcludePaths: []string{"/var/log/test/*"},
}

conn := &anypb.Any{}
err := conn.MarshalFrom(dockerConn)
assert.NoError(t, err)

s := &Source{}
err = s.Init(context.TODO(), "test source", 0, 0, false, conn, 1)
assert.NoError(t, err)

// Create test data
testFiles := []struct {
path string
excluded bool
}{
{"/var/log/test/file1", true}, // Should be excluded (direct child)
{"/var/log/test/sub/file2", true}, // Should be excluded (nested child)
{"/var/log/test", false}, // Should not be excluded (parent dir)
{"/var/log/test2/file", false}, // Should not be excluded (different dir)
{"/var/log/other/file", false}, // Should not be excluded (unrelated)
}

for _, tf := range testFiles {
excluded := false
for _, excludePath := range s.excludePaths {
// Convert wildcard pattern to regex
pattern := strings.ReplaceAll(excludePath, "*", ".*")
pattern = "^" + pattern + "$"
isMatch, err := regexp.MatchString(pattern, tf.path)
assert.NoError(t, err)
if isMatch {
excluded = true
break
}
}
assert.Equal(t, tf.excluded, excluded, "Unexpected exclusion result for path: %s", tf.path)
}
}
2 changes: 2 additions & 0 deletions pkg/sources/sources.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,8 @@ type DockerConfig struct {
BearerToken string
// UseDockerKeychain determines whether to use the Docker keychain.
UseDockerKeychain bool
// ExcludePaths is a list of paths to exclude from scanning.
ExcludePaths []string
}

// GCSConfig defines the optional configuration for a GCS source.
Expand Down
1 change: 1 addition & 0 deletions proto/sources.proto
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ message Docker {
bool docker_keychain = 4;
}
repeated string images = 5;
repeated string exclude_paths = 6;
}

message ECR {
Expand Down
Loading