diff --git a/cmd/syft/internal/test/integration/catalog_packages_test.go b/cmd/syft/internal/test/integration/catalog_packages_test.go index ec7940e00..a949090ee 100644 --- a/cmd/syft/internal/test/integration/catalog_packages_test.go +++ b/cmd/syft/internal/test/integration/catalog_packages_test.go @@ -12,6 +12,7 @@ import ( "github.com/anchore/stereoscope/pkg/imagetest" "github.com/anchore/syft/syft" "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/sbom" "github.com/anchore/syft/syft/source" ) @@ -93,40 +94,7 @@ func TestPkgCoverageImage(t *testing.T) { for _, c := range cases { t.Run(c.name, func(t *testing.T) { - pkgCount := 0 - - for a := range sbom.Artifacts.Packages.Enumerate(c.pkgType) { - if a.Language.String() != "" { - observedLanguages.Add(a.Language.String()) - } - - observedPkgs.Add(string(a.Type)) - expectedVersion, ok := c.pkgInfo[a.Name] - if !ok { - t.Errorf("unexpected package found: %s", a.Name) - } - - if expectedVersion != a.Version { - t.Errorf("unexpected package version (pkg=%s): %s, expected: %s", a.Name, a.Version, expectedVersion) - } - - if a.Language != c.pkgLanguage { - t.Errorf("bad language (pkg=%+v): %+v", a.Name, a.Language) - } - - if a.Type != c.pkgType { - t.Errorf("bad package type (pkg=%+v): %+v", a.Name, a.Type) - } - pkgCount++ - } - - if pkgCount != len(c.pkgInfo)+c.duplicates { - t.Logf("Discovered packages of type %+v", c.pkgType) - for a := range sbom.Artifacts.Packages.Enumerate(c.pkgType) { - t.Log(" ", a) - } - t.Fatalf("unexpected package count: %d!=%d", pkgCount, len(c.pkgInfo)) - } + assertPackages(t, sbom, c, observedLanguages, observedPkgs) }) } @@ -176,44 +144,7 @@ func TestPkgCoverageDirectory(t *testing.T) { for _, test := range cases { t.Run(test.name, func(t *testing.T) { - actualPkgCount := 0 - - for actualPkg := range sbom.Artifacts.Packages.Enumerate(test.pkgType) { - observedLanguages.Add(actualPkg.Language.String()) - observedPkgs.Add(string(actualPkg.Type)) - - expectedVersion, ok := test.pkgInfo[actualPkg.Name] - if !ok { - t.Errorf("unexpected package found: %s", actualPkg.Name) - } - - if expectedVersion != actualPkg.Version { - t.Errorf("unexpected package version (pkg=%s): %s", actualPkg.Name, actualPkg.Version) - } - - var foundLang bool - for _, lang := range strings.Split(test.pkgLanguage.String(), ",") { - if actualPkg.Language.String() == lang { - foundLang = true - break - } - } - if !foundLang { - t.Errorf("bad language (pkg=%+v): %+v", actualPkg.Name, actualPkg.Language) - } - - if actualPkg.Type != test.pkgType { - t.Errorf("bad package type (pkg=%+v): %+v", actualPkg.Name, actualPkg.Type) - } - actualPkgCount++ - } - - if actualPkgCount != len(test.pkgInfo)+test.duplicates { - for actualPkg := range sbom.Artifacts.Packages.Enumerate(test.pkgType) { - t.Log(" ", actualPkg) - } - t.Fatalf("unexpected package count: %d!=%d", actualPkgCount, len(test.pkgInfo)) - } + assertPackages(t, sbom, test, observedLanguages, observedPkgs) }) } @@ -243,6 +174,61 @@ func TestPkgCoverageDirectory(t *testing.T) { } } +func assertPackages(t *testing.T, sbom sbom.SBOM, test testCase, observedLanguages *strset.Set, observedPkgs *strset.Set) { + actualPkgCount := 0 + + for actualPkg := range sbom.Artifacts.Packages.Enumerate(test.pkgType) { + observedLanguages.Add(actualPkg.Language.String()) + observedPkgs.Add(string(actualPkg.Type)) + + expectedVersion, ok := test.pkgInfo[actualPkg.Name] + if !ok { + t.Errorf("unexpected package found: %s", actualPkg.Name) + } + + if expectedVersion != actualPkg.Version { + t.Errorf("unexpected package version (pkg=%s): %s", actualPkg.Name, actualPkg.Version) + } + + var foundLang bool + for _, lang := range strings.Split(test.pkgLanguage.String(), ",") { + if actualPkg.Language.String() == lang { + foundLang = true + break + } + } + if !foundLang { + t.Errorf("bad language (pkg=%+v): %+v", actualPkg.Name, actualPkg.Language) + } + + if actualPkg.Type != test.pkgType { + t.Errorf("bad package type (pkg=%+v): %+v", actualPkg.Name, actualPkg.Type) + } + actualPkgCount++ + + // all packages should have at least one location associated with it, and of those locations at least one should be primary evidence + locs := actualPkg.Locations.ToSlice() + assert.NotEmpty(t, locs, "package %q has no locations (type=%q)", actualPkg.Name, actualPkg.Type) + var primaryEvidenceFound bool + for _, l := range locs { + if _, exists := l.Annotations[pkg.EvidenceAnnotationKey]; !exists { + t.Errorf("missing evidence annotation (pkg=%s type=%s)", actualPkg.Name, actualPkg.Type) + } + if l.Annotations[pkg.EvidenceAnnotationKey] == pkg.PrimaryEvidenceAnnotation { + primaryEvidenceFound = true + } + } + assert.True(t, primaryEvidenceFound, "no primary evidence found for package %q", actualPkg.Name) + } + + if actualPkgCount != len(test.pkgInfo)+test.duplicates { + for actualPkg := range sbom.Artifacts.Packages.Enumerate(test.pkgType) { + t.Log(" ", actualPkg) + } + t.Fatalf("unexpected package count: %d!=%d", actualPkgCount, len(test.pkgInfo)) + } +} + func TestPkgCoverageImage_HasEvidence(t *testing.T) { sbom, _ := catalogFixtureImage(t, "image-pkg-coverage", source.SquashedScope) diff --git a/internal/task/scope_tasks.go b/internal/task/scope_tasks.go new file mode 100644 index 000000000..8127829ab --- /dev/null +++ b/internal/task/scope_tasks.go @@ -0,0 +1,63 @@ +package task + +import ( + "context" + "fmt" + + "github.com/anchore/syft/internal/sbomsync" + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/sbom" +) + +func NewDeepSquashedScopeCleanupTask() Task { + fn := func(_ context.Context, _ file.Resolver, builder sbomsync.Builder) error { + accessor := builder.(sbomsync.Accessor) + + // remove all packages that doesn't exist in the final state of the image + builder.DeletePackages(packagesToRemove(accessor)...) + return nil + } + + return NewTask("deep-squashed-cleaner", fn) +} + +func packagesToRemove(accessor sbomsync.Accessor) []artifact.ID { + pkgsToDelete := make([]artifact.ID, 0) + accessor.ReadFromSBOM(func(s *sbom.SBOM) { + filterDuplicates := make(map[string]bool) + for p := range s.Artifacts.Packages.Enumerate() { + noSquashed := true + noPrimary := true + for _, l := range p.Locations.ToSlice() { + isPrimaryEvidence := l.Annotations[pkg.EvidenceAnnotationKey] == pkg.PrimaryEvidenceAnnotation + switch l.Annotations[file.VisibleAnnotationKey] { + case file.VisibleAnnotation: + if isPrimaryEvidence || p.Type == pkg.BinaryPkg { + noSquashed = false + break + } + case "": + if isPrimaryEvidence { + if exists := filterDuplicates[getKey(p, l)]; exists { + break + } + filterDuplicates[getKey(p, l)] = true + noPrimary = false + break + } + } + } + + if noSquashed && noPrimary { + pkgsToDelete = append(pkgsToDelete, p.ID()) + } + } + }) + return pkgsToDelete +} + +func getKey(pkg pkg.Package, loc file.Location) string { + return fmt.Sprintf("%s-%s-%s-%s", pkg.Name, pkg.Version, loc.RealPath, loc.AccessPath) +} diff --git a/syft/create_sbom_config.go b/syft/create_sbom_config.go index fcbcd5f6d..c189da4aa 100644 --- a/syft/create_sbom_config.go +++ b/syft/create_sbom_config.go @@ -188,6 +188,7 @@ func (c *CreateSBOMConfig) makeTaskGroups(src source.Description) ([][]task.Task // generate package and file tasks based on the configuration environmentTasks := c.environmentTasks() + scopeTasks := c.scopeTasks() relationshipsTasks := c.relationshipTasks(src) unknownTasks := c.unknownsTasks() @@ -204,6 +205,11 @@ func (c *CreateSBOMConfig) makeTaskGroups(src source.Description) ([][]task.Task taskGroups = append(taskGroups, append(pkgTasks, fileTasks...)) } + // all scope work must be done after all nodes (files and packages) have been cataloged and before the relationship + if len(scopeTasks) > 0 { + taskGroups = append(taskGroups, scopeTasks) + } + // all relationship work must be done after all nodes (files and packages) have been cataloged if len(relationshipsTasks) > 0 { taskGroups = append(taskGroups, relationshipsTasks) @@ -391,6 +397,17 @@ func (c *CreateSBOMConfig) userPackageTasks(cfg task.CatalogingFactoryConfig) ([ return persistentPackageTasks, selectablePackageTasks, nil } +// scopeTasks returns the set of tasks that should be run to generate additional scope information +func (c *CreateSBOMConfig) scopeTasks() []task.Task { + var tsks []task.Task + if c.Search.Scope == source.DeepSquashedScope { + if t := task.NewDeepSquashedScopeCleanupTask(); t != nil { + tsks = append(tsks, t) + } + } + return tsks +} + // relationshipTasks returns the set of tasks that should be run to generate additional relationships as well as // prune existing relationships. func (c *CreateSBOMConfig) relationshipTasks(src source.Description) []task.Task { diff --git a/syft/file/location.go b/syft/file/location.go index 8a0fe6f38..a70fe5d72 100644 --- a/syft/file/location.go +++ b/syft/file/location.go @@ -9,6 +9,17 @@ import ( "github.com/anchore/stereoscope/pkg/image" ) +const ( + // VisibleAnnotationKey is the key used to indicate if the location is visible or not at runtime + VisibleAnnotationKey = "visible" + + // HiddenAnnotation is the value used to indicate that the location is not visible at runtime because it was deleted + HiddenAnnotation = "false" + + // VisibleAnnotation is the value used to indicate that the location is visible at runtime + VisibleAnnotation = "true" +) + // Location represents a path relative to a particular filesystem resolved to a specific file.Reference. This struct is used as a key // in content fetching to uniquely identify a file relative to a request (the AccessPath). type Location struct { @@ -48,6 +59,9 @@ func (m *LocationMetadata) merge(other LocationMetadata) error { } func (l Location) WithAnnotation(key, value string) Location { + if key == "" || value == "" { + return l + } if l.Annotations == nil { l.Annotations = map[string]string{} } diff --git a/syft/internal/fileresolver/container_image_deep_squash.go b/syft/internal/fileresolver/container_image_deep_squash.go new file mode 100644 index 000000000..0666d6928 --- /dev/null +++ b/syft/internal/fileresolver/container_image_deep_squash.go @@ -0,0 +1,252 @@ +package fileresolver + +import ( + "context" + "io" + + "github.com/anchore/stereoscope/pkg/image" + "github.com/anchore/syft/syft/file" +) + +var _ file.Resolver = (*ContainerImageDeepSquash)(nil) + +// ContainerImageDeepSquash implements path and content access for the paths in the squashed tree, but with additional +// depth from all layers. The goal of this is to allow for producing results where the first layer which the material +// was added can be annotated in the SBOM (as opposed to the last [visible] layer for the path like with the squashed +// file resolver). +type ContainerImageDeepSquash struct { + squashed file.Resolver + allLayers file.Resolver +} + +// NewFromContainerImageDeepSquash returns a new resolver from the perspective of all image layers for the given image. +func NewFromContainerImageDeepSquash(img *image.Image) (*ContainerImageDeepSquash, error) { + squashed, err := NewFromContainerImageSquash(img) + if err != nil { + return nil, err + } + + allLayers, err := NewFromContainerImageAllLayers(img) + if err != nil { + return nil, err + } + + return &ContainerImageDeepSquash{ + squashed: squashed, + allLayers: allLayers, + }, nil +} + +// HasPath indicates if the given path exists in the underlying source. +func (i *ContainerImageDeepSquash) HasPath(path string) bool { + // there is no need to merge results from all layers since path-based results should always be adjusted relative to the squashed tree (which is different when considering layers) + return i.squashed.HasPath(path) +} + +// FilesByPath returns all file.References that match the given paths from any layer in the image. +func (i *ContainerImageDeepSquash) FilesByPath(paths ...string) ([]file.Location, error) { + squashedLocations, err := i.squashed.FilesByPath(paths...) + if err != nil { + return nil, err + } + + if len(squashedLocations) == 0 { + // this is meant to return all files in all layers only for paths that are present in the squashed tree. If + // there are no results from the squashed tree then there are no paths to raise up. + return nil, nil + } + + allLayersLocations, err := i.allLayers.FilesByPath(paths...) + if err != nil { + return nil, err + } + + return i.mergeLocations(squashedLocations, allLayersLocations), nil +} + +// FilesByGlob returns all file.References that match the given path glob pattern from any layer in the image. +func (i *ContainerImageDeepSquash) FilesByGlob(patterns ...string) ([]file.Location, error) { + squashedLocations, err := i.squashed.FilesByGlob(patterns...) + if err != nil { + return nil, err + } + + if len(squashedLocations) == 0 { + // this is meant to return all files in all layers only for paths that are present in the squashed tree. If + // there are no results from the squashed tree then there are no paths to raise up. + return nil, nil + } + + allLayersLocations, err := i.allLayers.FilesByGlob(patterns...) + if err != nil { + return nil, err + } + + return i.mergeLocations(squashedLocations, allLayersLocations), nil +} + +// RelativeFileByPath fetches a single file at the given path relative to the layer squash of the given reference. +// This is helpful when attempting to find a file that is in the same layer or lower as another file. +func (i *ContainerImageDeepSquash) RelativeFileByPath(location file.Location, path string) *file.Location { + if !i.squashed.HasPath(path) { + return nil + } + + l := i.squashed.RelativeFileByPath(location, path) + if l != nil { + loc := l.WithAnnotation(file.VisibleAnnotationKey, file.VisibleAnnotation) + return &loc + } + + l = i.allLayers.RelativeFileByPath(location, path) + if l != nil { + loc := l.WithAnnotation(file.VisibleAnnotationKey, file.HiddenAnnotation) + return &loc + } + return nil +} + +// FileContentsByLocation fetches file contents for a single file reference. +// If the path does not exist an error is returned. +func (i *ContainerImageDeepSquash) FileContentsByLocation(location file.Location) (io.ReadCloser, error) { + // regardless of the layer or scope, if the user gives us a specific path+layer location, then we should always + // return the contents for that specific location (thus all-layers scope must always be used) + return i.allLayers.FileContentsByLocation(location) +} + +func (i *ContainerImageDeepSquash) FilesByMIMEType(types ...string) ([]file.Location, error) { + squashedLocations, err := i.squashed.FilesByMIMEType(types...) + if err != nil { + return nil, err + } + + if len(squashedLocations) == 0 { + // this is meant to return all files in all layers only for paths that are present in the squashed tree. If + // there are no results from the squashed tree then there are no paths to raise up. + return nil, nil + } + + allLayersLocations, err := i.allLayers.FilesByMIMEType(types...) + if err != nil { + return nil, err + } + + return i.mergeLocations(squashedLocations, allLayersLocations), nil +} + +func (i *ContainerImageDeepSquash) AllLocations(ctx context.Context) <-chan file.Location { + return i.mergeLocationStreams(ctx, i.squashed.AllLocations(ctx), i.allLayers.AllLocations(ctx)) +} + +func (i *ContainerImageDeepSquash) FileMetadataByLocation(location file.Location) (file.Metadata, error) { + // regardless of the layer or scope, if the user gives us a specific path+layer location, then we should always + // return the metadata for that specific location (thus all-layers scope must always be used) + return i.allLayers.FileMetadataByLocation(location) +} + +func (i *ContainerImageDeepSquash) mergeLocations(squashedLocations, allLayersLocations []file.Location) []file.Location { + var result []file.Location + + if len(squashedLocations) == 0 { + // this is meant to return all files in all layers only for paths that are present in the squashed tree. If + // there are no results from the squashed tree then there are no paths to raise up. + return nil + } + + // we are using a location set to deduplicate locations, but we don't use it for the returned + // results in order to preserve the order of the locations from the underlying filetree query + squashedCoords := file.NewLocationSet() + for _, l := range squashedLocations { + result = append(result, l.WithAnnotation(file.VisibleAnnotationKey, file.VisibleAnnotation)) + squashedCoords.Add(l) + } + + for _, l := range allLayersLocations { + if squashedCoords.Contains(l) { + // this path + layer is already in the squashed tree results, skip it (deduplicate location results) + continue + } + + if !i.squashed.HasPath(l.RealPath) { + // if we find a location for a path that matches the query (e.g. **/node_modules) but is not present in the squashed tree, skip it + continue + } + + // not only should the real path to the file exist, but the way we took to get there should also exist + // (e.g. if we are looking for /etc/passwd, but the real path is /etc/passwd -> /etc/passwd-1, then we should + // make certain that /etc/passwd-1 exists) + if l.AccessPath != "" && !i.squashed.HasPath(l.AccessPath) { + continue + } + + result = append(result, l.WithAnnotation(file.VisibleAnnotationKey, file.HiddenAnnotation)) + } + + return result +} + +func (i *ContainerImageDeepSquash) mergeLocationStreams(ctx context.Context, squashedLocations, allLayersLocations <-chan file.Location) <-chan file.Location { + result := make(chan file.Location) + go func() { + defer close(result) + + // we are using a location set to deduplicate locations, but we don't use it for the returned + // results in order to preserve the order of the locations from the underlying filetree query + squashedCoords := file.NewLocationSet() + var isDone bool + for l := range squashedLocations { + if isDone { + // bleed off the rest of the results from the squashed stream and not leak a goroutine + continue + } + select { + case <-ctx.Done(): + isDone = true + default: + result <- l.WithAnnotation(file.VisibleAnnotationKey, file.VisibleAnnotation) + squashedCoords.Add(l) + } + } + + for l := range allLayersLocations { + if isDone { + // bleed off the rest of the results from the squashed stream and not leak a goroutine + continue + } + + if squashedCoords.Empty() { + // this is meant to return all files in all layers only for paths that are present in the squashed tree. + // If there are no results from the squashed tree, then there are no paths to raise up. + // That being said, we need to bleed off the rest of the results from the allLayersLocations stream + // and not leak a goroutine. + continue + } + + if squashedCoords.Contains(l) { + // we've already seen this location from the squashed stream, skip it + continue + } + + if !i.squashed.HasPath(l.RealPath) { + // if we find a location for a path that matches the query (e.g. **/node_modules) but is not present in the squashed tree, skip it + continue + } + + // not only should the real path to the file exist, but the way we took to get there should also exist + // (e.g. if we are looking for /etc/passwd, but the real path is /etc/passwd -> /etc/passwd-1, then we should + // make certain that /etc/passwd-1 exists) + if l.AccessPath != "" && !i.squashed.HasPath(l.AccessPath) { + continue + } + + select { + case <-ctx.Done(): + isDone = true + default: + result <- l.WithAnnotation(file.VisibleAnnotationKey, file.HiddenAnnotation) + } + } + }() + + return result +} diff --git a/syft/internal/fileresolver/container_image_deep_squash_test.go b/syft/internal/fileresolver/container_image_deep_squash_test.go new file mode 100644 index 000000000..c1a6945a3 --- /dev/null +++ b/syft/internal/fileresolver/container_image_deep_squash_test.go @@ -0,0 +1,1109 @@ +package fileresolver + +import ( + "context" + "fmt" + "io" + "sort" + "sync" + "testing" + "time" + + "github.com/google/go-cmp/cmp" + "github.com/scylladb/go-set/strset" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.uber.org/goleak" + + "github.com/anchore/stereoscope/pkg/imagetest" + "github.com/anchore/syft/syft/file" +) + +type mockSimpleResolver struct { + file.Resolver // embed to fulfill the interface, panics for stuff not implemented + paths *strset.Set + locations map[string][]file.Location +} + +func newMockSimpleResolver(locations []file.Location) *mockSimpleResolver { + paths := strset.New() + locationMap := make(map[string][]file.Location) + for _, loc := range locations { + paths.Add(loc.RealPath) + paths.Add(loc.AccessPath) + locationMap[loc.RealPath] = append(locationMap[loc.RealPath], loc) + } + return &mockSimpleResolver{ + paths: paths, + locations: locationMap, + } +} + +func (m *mockSimpleResolver) HasPath(p string) bool { + return m.paths.Has(p) +} + +func (m *mockSimpleResolver) FilesByPath(paths ...string) ([]file.Location, error) { + var results []file.Location + for _, path := range paths { + if locs, exists := m.locations[path]; exists { + results = append(results, locs...) + } + } + return results, nil +} + +func Test_ContainerImageDeepSquash_FilesByPath(t *testing.T) { + cases := []struct { + name string + linkPath string + resolveLayer uint + resolvePath string + forcePositiveHasPath bool + expectedRefs int + }{ + { + name: "link with previous data", + linkPath: "/link-1", + resolveLayer: 1, + resolvePath: "/file-1.txt", + expectedRefs: 1, + }, + { + name: "link with in layer data", + linkPath: "/link-within", + resolveLayer: 5, + resolvePath: "/file-3.txt", + expectedRefs: 1, + }, + { + name: "link with overridden data", + linkPath: "/link-2", + resolveLayer: 7, + resolvePath: "/file-2.txt", + expectedRefs: 2, + }, + { + name: "indirect link (with overridden data)", + linkPath: "/link-indirect", + resolveLayer: 7, + resolvePath: "/file-2.txt", + expectedRefs: 2, + }, + { + name: "dead link", + linkPath: "/link-dead", + resolveLayer: 8, + resolvePath: "", + // the path should exist, even if the link is dead + forcePositiveHasPath: true, + }, + { + name: "ignore directories", + linkPath: "/bin", + resolvePath: "", + // the path should exist, even if we ignore it + forcePositiveHasPath: true, + }, + { + name: "parent is a link (with overridden data)", + linkPath: "/parent-link/file-4.txt", + resolveLayer: 11, + resolvePath: "/parent/file-4.txt", + expectedRefs: 1, + }, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") + + resolver, err := NewFromContainerImageDeepSquash(img) + require.NoError(t, err) + + hasPath := resolver.HasPath(c.linkPath) + if !c.forcePositiveHasPath { + if c.resolvePath != "" && !hasPath { + t.Errorf("expected HasPath() to indicate existance, but did not") + } else if c.resolvePath == "" && hasPath { + t.Errorf("expeced HasPath() to NOT indicate existance, but does") + } + } else if !hasPath { + t.Errorf("expected HasPath() to indicate existance, but did not (force path)") + } + + refs, err := resolver.FilesByPath(c.linkPath) + require.NoError(t, err) + + expectedRefs := c.expectedRefs + if c.resolvePath == "" { + expectedRefs = 0 + } + + if len(refs) != expectedRefs { + t.Fatalf("unexpected number of resolutions: %d", len(refs)) + } + + if expectedRefs == 0 { + // nothing else to assert + return + } + + actual := refs[0] + + if string(actual.Reference().RealPath) != c.resolvePath { + t.Errorf("bad resolve path: '%s'!='%s'", string(actual.Reference().RealPath), c.resolvePath) + } + + if c.resolvePath != "" && string(actual.Reference().RealPath) != actual.RealPath { + t.Errorf("we should always prefer real paths over ones with links") + } + + layer := img.FileCatalog.Layer(actual.Reference()) + + if layer.Metadata.Index != c.resolveLayer { + t.Errorf("bad resolve layer: '%d'!='%d'", layer.Metadata.Index, c.resolveLayer) + } + }) + } +} + +func Test_ContainerImageDeepSquash_FilesByGlob(t *testing.T) { + cases := []struct { + name string + glob string + resolveLayer uint + resolvePath string + expectedRefs int + }{ + { + name: "link with previous data", + glob: "**/link-1", + resolveLayer: 1, + resolvePath: "/file-1.txt", + expectedRefs: 1, + }, + { + name: "link with in layer data", + glob: "**/link-within", + resolveLayer: 5, + resolvePath: "/file-3.txt", + expectedRefs: 1, + }, + { + name: "link with overridden data", + glob: "**/link-2", + resolveLayer: 7, + resolvePath: "/file-2.txt", + expectedRefs: 2, + }, + { + name: "indirect link (with overridden data)", + glob: "**/link-indirect", + resolveLayer: 7, + resolvePath: "/file-2.txt", + expectedRefs: 2, + }, + { + name: "dead link", + glob: "**/link-dead", + // dead links are dead! they shouldn't match on globs + resolvePath: "", + }, + { + name: "ignore directories", + glob: "**/bin", + resolvePath: "", + }, + { + name: "parent without link", + glob: "**/parent/*.txt", + resolveLayer: 11, + resolvePath: "/parent/file-4.txt", + expectedRefs: 2, + }, + { + name: "parent is a link (override)", + glob: "**/parent-link/file-4.txt", + resolveLayer: 11, + resolvePath: "/parent/file-4.txt", + expectedRefs: 2, + }, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") + + resolver, err := NewFromContainerImageDeepSquash(img) + require.NoError(t, err) + + refs, err := resolver.FilesByGlob(c.glob) + require.NoError(t, err) + + expectedRefs := c.expectedRefs + if c.resolvePath == "" { + expectedRefs = 0 + } + + if len(refs) != expectedRefs { + t.Fatalf("unexpected number of resolutions: %d", len(refs)) + } + + if expectedRefs == 0 { + // nothing else to assert + return + } + + actual := refs[0] + + if string(actual.Reference().RealPath) != c.resolvePath { + t.Errorf("bad resolve path: '%s'!='%s'", string(actual.Reference().RealPath), c.resolvePath) + } + + if c.resolvePath != "" && string(actual.Reference().RealPath) != actual.RealPath { + t.Errorf("we should always prefer real paths over ones with links") + } + + layer := img.FileCatalog.Layer(actual.Reference()) + + if layer.Metadata.Index != c.resolveLayer { + t.Errorf("bad resolve layer: '%d'!='%d'", layer.Metadata.Index, c.resolveLayer) + } + }) + } +} + +func Test_ContainerImageDeepSquash_FilesByMIMEType(t *testing.T) { + + tests := []struct { + fixtureName string + mimeType string + expectedPaths *strset.Set + }{ + { + fixtureName: "image-simple", + mimeType: "text/plain", + expectedPaths: strset.New("/somefile-1.txt", "/somefile-2.txt", "/really/nested/file-3.txt"), + }, + } + + for _, test := range tests { + t.Run(test.fixtureName, func(t *testing.T) { + img := imagetest.GetFixtureImage(t, "docker-archive", test.fixtureName) + + resolver, err := NewFromContainerImageDeepSquash(img) + assert.NoError(t, err) + + locations, err := resolver.FilesByMIMEType(test.mimeType) + assert.NoError(t, err) + + assert.Len(t, locations, test.expectedPaths.Size()) + for _, l := range locations { + assert.True(t, test.expectedPaths.Has(l.RealPath), "does not have path %q", l.RealPath) + } + }) + } +} + +func Test_ContainerImageDeepSquash_hasFilesystemIDInLocation(t *testing.T) { + img := imagetest.GetFixtureImage(t, "docker-archive", "image-duplicate-path") + + resolver, err := NewFromContainerImageDeepSquash(img) + assert.NoError(t, err) + + locations, err := resolver.FilesByMIMEType("text/plain") + assert.NoError(t, err) + assert.NotEmpty(t, locations) + for _, location := range locations { + assert.NotEmpty(t, location.FileSystemID) + } + + locations, err = resolver.FilesByGlob("*.txt") + assert.NoError(t, err) + assert.NotEmpty(t, locations) + for _, location := range locations { + assert.NotEmpty(t, location.FileSystemID) + } + + locations, err = resolver.FilesByPath("/somefile-1.txt") + assert.NoError(t, err) + assert.NotEmpty(t, locations) + for _, location := range locations { + assert.NotEmpty(t, location.FileSystemID) + } + +} + +func Test_ContainerImageDeepSquash_FilesContents(t *testing.T) { + + tests := []struct { + name string + path string + contents []string + }{ + { + name: "one degree", + path: "link-2", + contents: []string{ + "NEW file override!", + "file 2!", + }, + }, + { + name: "two degrees", + path: "link-indirect", + contents: []string{ + "NEW file override!", + "file 2!", + }, + }, + { + name: "dead link", + path: "link-dead", + contents: []string{}, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") + + resolver, err := NewFromContainerImageDeepSquash(img) + assert.NoError(t, err) + + refs, err := resolver.FilesByPath(test.path) + require.NoError(t, err) + assert.Len(t, refs, len(test.contents)) + + for idx, loc := range refs { + + reader, err := resolver.FileContentsByLocation(loc) + require.NoError(t, err) + + actual, err := io.ReadAll(reader) + require.NoError(t, err) + + assert.Equal(t, test.contents[idx], string(actual)) + } + }) + } +} + +func Test_ContainerImageDeepSquash_FilesContents_errorOnDirRequest(t *testing.T) { + img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") + + resolver, err := NewFromContainerImageDeepSquash(img) + assert.NoError(t, err) + + var dirLoc *file.Location + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + for loc := range resolver.AllLocations(ctx) { + // this is known to be a directory in the test fixture + if dirLoc == nil && loc.RealPath == "/parent" { + dirLoc = &loc + } + } + + require.NotNil(t, dirLoc) + + reader, err := resolver.FileContentsByLocation(*dirLoc) + require.Error(t, err) + require.Nil(t, reader) +} + +func Test_ContainerImageDeepSquash_resolvesLinks(t *testing.T) { + tests := []struct { + name string + runner func(file.Resolver) []file.Location + expected []file.Location + }{ + { + name: "by mimetype", + runner: func(resolver file.Resolver) []file.Location { + // links should not show up when searching mimetype + actualLocations, err := resolver.FilesByMIMEType("text/plain") + assert.NoError(t, err) + return actualLocations + }, + expected: []file.Location{ + file.NewVirtualLocation("/etc/group", "/etc/group"), + file.NewVirtualLocation("/etc/passwd", "/etc/passwd"), + file.NewVirtualLocation("/etc/shadow", "/etc/shadow"), + file.NewVirtualLocation("/file-1.txt", "/file-1.txt"), + file.NewVirtualLocation("/file-3.txt", "/file-3.txt"), + file.NewVirtualLocation("/file-2.txt", "/file-2.txt"), + file.NewVirtualLocation("/file-2.txt", "/file-2.txt"), + file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"), + file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"), + }, + }, + { + name: "by glob to links", + runner: func(resolver file.Resolver) []file.Location { + // links are searched, but resolve to the real files + actualLocations, err := resolver.FilesByGlob("*ink-*") + assert.NoError(t, err) + return actualLocations + }, + expected: []file.Location{ + file.NewVirtualLocation("/file-1.txt", "/link-1"), + file.NewVirtualLocation("/file-2.txt", "/link-2"), + file.NewVirtualLocation("/file-2.txt", "/link-2"), + + // though this is a link, and it matches to the file, the resolver de-duplicates files + // by the real path, so it is not included in the results + //file.NewVirtualLocation("/file-2.txt", "/link-indirect"), + + file.NewVirtualLocation("/file-3.txt", "/link-within"), + }, + }, + { + name: "by basename", + runner: func(resolver file.Resolver) []file.Location { + // links are searched, but resolve to the real files + actualLocations, err := resolver.FilesByGlob("**/file-2.txt") + assert.NoError(t, err) + return actualLocations + }, + expected: []file.Location{ + // this has two copies in the base image, which overwrites the same location + file.NewVirtualLocation("/file-2.txt", "/file-2.txt"), + file.NewVirtualLocation("/file-2.txt", "/file-2.txt"), + }, + }, + { + name: "by basename glob", + runner: func(resolver file.Resolver) []file.Location { + // links are searched, but resolve to the real files + actualLocations, err := resolver.FilesByGlob("**/file-?.txt") + assert.NoError(t, err) + return actualLocations + }, + expected: []file.Location{ + file.NewVirtualLocation("/file-1.txt", "/file-1.txt"), + file.NewVirtualLocation("/file-2.txt", "/file-2.txt"), + file.NewVirtualLocation("/file-2.txt", "/file-2.txt"), + file.NewVirtualLocation("/file-3.txt", "/file-3.txt"), + file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"), + file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"), + }, + }, + { + name: "by basename glob to links", + runner: func(resolver file.Resolver) []file.Location { + actualLocations, err := resolver.FilesByGlob("**/link-*") + assert.NoError(t, err) + return actualLocations + }, + expected: []file.Location{ + file.NewVirtualLocation("/file-1.txt", "/link-1"), + file.NewVirtualLocation("/file-2.txt", "/link-2"), + file.NewVirtualLocation("/file-2.txt", "/link-2"), + + // we already have this real file path via another link, so only one is returned + // file.NewVirtualLocation("/file-2.txt", "/link-indirect"), + + file.NewVirtualLocation("/file-3.txt", "/link-within"), + }, + }, + { + name: "by extension", + runner: func(resolver file.Resolver) []file.Location { + // links are searched, but resolve to the real files + actualLocations, err := resolver.FilesByGlob("**/*.txt") + assert.NoError(t, err) + return actualLocations + }, + expected: []file.Location{ + file.NewVirtualLocation("/file-1.txt", "/file-1.txt"), + file.NewVirtualLocation("/file-2.txt", "/file-2.txt"), + file.NewVirtualLocation("/file-2.txt", "/file-2.txt"), + file.NewVirtualLocation("/file-3.txt", "/file-3.txt"), + file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"), + file.NewVirtualLocation("/parent/file-4.txt", "/parent/file-4.txt"), + }, + }, + { + name: "by path to degree 1 link", + runner: func(resolver file.Resolver) []file.Location { + // links resolve to the final file + actualLocations, err := resolver.FilesByPath("/link-2") + assert.NoError(t, err) + return actualLocations + }, + expected: []file.Location{ + // we have multiple copies across layers + file.NewVirtualLocation("/file-2.txt", "/link-2"), + file.NewVirtualLocation("/file-2.txt", "/link-2"), + }, + }, + { + name: "by path to degree 2 link", + runner: func(resolver file.Resolver) []file.Location { + // multiple links resolves to the final file + actualLocations, err := resolver.FilesByPath("/link-indirect") + assert.NoError(t, err) + return actualLocations + }, + expected: []file.Location{ + // we have multiple copies across layers + file.NewVirtualLocation("/file-2.txt", "/link-indirect"), + file.NewVirtualLocation("/file-2.txt", "/link-indirect"), + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + + img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") + + resolver, err := NewFromContainerImageDeepSquash(img) + assert.NoError(t, err) + + actual := test.runner(resolver) + + compareLocations(t, test.expected, actual) + }) + } + +} + +func Test_ContainerImageDeepSquash_AllLocations(t *testing.T) { + img := imagetest.GetFixtureImage(t, "docker-archive", "image-files-deleted") + + resolver, err := NewFromContainerImageDeepSquash(img) + assert.NoError(t, err) + + paths := strset.New() + for loc := range resolver.AllLocations(context.Background()) { + paths.Add(loc.RealPath) + } + expected := []string{ + "/Dockerfile", + "/file-3.txt", + "/target", + "/target/file-2.txt", + } + + // depending on how the image is built (either from linux or mac), sys and proc might accidentally be added to the image. + // this isn't important for the test, so we remove them. + paths.Remove("/proc", "/sys", "/dev", "/etc") + + // remove cache created by Mac Rosetta when emulating different arches + paths.Remove("/.cache/rosetta", "/.cache") + + pathsList := paths.List() + sort.Strings(pathsList) + + assert.ElementsMatchf(t, expected, pathsList, "expected all paths to be indexed, but found different paths: \n%s", cmp.Diff(expected, paths.List())) +} + +func TestContainerImageDeepSquash_MergeLocations(t *testing.T) { + tests := []struct { + name string + squashedLocations file.LocationSet + allLayersLocations file.LocationSet + expectedLocations int + expectedVisibleOnly bool + }{ + { + name: "empty squashed locations returns empty", + squashedLocations: file.NewLocationSet(), + allLayersLocations: file.NewLocationSet(makeLocation("/some/path", 1)), + expectedLocations: 0, + expectedVisibleOnly: false, + }, + { + name: "only squashed locations returns all as visible", + squashedLocations: file.NewLocationSet( + makeLocation("/path/one", 1), + makeLocation("/path/two", 1), + ), + allLayersLocations: file.NewLocationSet(), + expectedLocations: 2, + expectedVisibleOnly: true, + }, + { + name: "deduplicates matching locations between squashed and all layers + additional hidden locations", + squashedLocations: file.NewLocationSet(makeLocation("/path/one", 2)), + allLayersLocations: file.NewLocationSet(makeLocation("/path/one", 2), makeLocation("/path/one", 1)), + expectedLocations: 2, + expectedVisibleOnly: false, + }, + { + name: "deduplicates matching locations between squashed and all layers", + squashedLocations: file.NewLocationSet(makeLocation("/path/one", 1)), + allLayersLocations: file.NewLocationSet(makeLocation("/path/one", 1)), + expectedLocations: 1, + expectedVisibleOnly: true, + }, + { + name: "all layers locations with paths not in squashed tree are excluded", + squashedLocations: file.NewLocationSet(makeLocation("/path/one", 1)), + allLayersLocations: file.NewLocationSet( + makeLocation("/path/one", 1), // layer 2 version will be skipped (deduped) + makeLocation("/path/not/in/squashed", 2), // will be excluded due to path not in squashed + ), + expectedLocations: 1, + expectedVisibleOnly: true, + }, + { + name: "includes hidden locations from all layers when path in squashed tree", + squashedLocations: file.NewLocationSet(makeLocation("/path/one", 1), makeLocation("/path/two", 2)), + allLayersLocations: file.NewLocationSet( + makeLocation("/path/one", 1), // will be deduped + makeLocation("/path/one", 2), // will be included as hidden + makeLocation("/path/two", 2), // will be deduped + makeLocation("/path/two", 3), // will be included as hidden + makeLocation("/path/two", 4), // will be included as hidden + ), + expectedLocations: 5, // 2 from squashed + 3 from layers for path/two + expectedVisibleOnly: false, + }, + { + name: "complex scenario with multiple paths and layers", + squashedLocations: file.NewLocationSet( + makeLocation("/bin/bash", 1), + makeLocation("/etc/passwd", 2), + makeLocation("/var/log/syslog", 3), + ), + allLayersLocations: file.NewLocationSet( + makeLocation("/bin/bash", 1), // will be deduped + makeLocation("/bin/bash", 0), // will be included as hidden + makeLocation("/etc/passwd", 2), // will be deduped + makeLocation("/etc/passwd", 0), // will be included as hidden + makeLocation("/var/log/syslog", 3), // will be deduped + makeLocation("/var/log/syslog", 0), // will be included as hidden + makeLocation("/tmp/not-in-squash", 4), // will be excluded - not in squashed + ), + expectedLocations: 6, // 3 from squashed + 3 hidden from all layers + expectedVisibleOnly: false, + }, + { + name: "include virtual locations", + squashedLocations: file.NewLocationSet( + makeLocation("/path/one", 1), + makeLocation("/path/two", 2), + makeLocation("/path/to-one", 2), // a symlink + ), + allLayersLocations: file.NewLocationSet( + makeLocation("/path/one", 1), // will be deduped + makeVirtualLocation("/path/one", "/path/to-one", 2), + ), + expectedLocations: 4, + expectedVisibleOnly: false, + }, + { + name: "don't include hidden virtual locations", + squashedLocations: file.NewLocationSet( + makeLocation("/path/one", 1), + ), + allLayersLocations: file.NewLocationSet( + makeLocation("/path/one", 1), // will be deduped + makeVirtualLocation("/path/one", "/path/to-one", 2), // would have been included if /path/to-one was in the squash tree + ), + expectedLocations: 1, + expectedVisibleOnly: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + + resolver := &ContainerImageDeepSquash{ + squashed: newMockSimpleResolver(tt.squashedLocations.ToSlice()), + allLayers: newMockSimpleResolver(tt.allLayersLocations.ToSlice()), + } + + squashedLocations := tt.squashedLocations.ToSlice() + allLayersLocations := tt.allLayersLocations.ToSlice() + + mergedLocations := resolver.mergeLocations(squashedLocations, allLayersLocations) + + require.Len(t, mergedLocations, tt.expectedLocations, "incorrect number of merged locations (expected %d, found %d)", tt.expectedLocations, len(mergedLocations)) + + if tt.expectedLocations > 0 { + onlyVisible := true + for _, loc := range mergedLocations { + if annotation, ok := loc.Annotations[file.VisibleAnnotationKey]; ok { + if annotation != file.VisibleAnnotation { + onlyVisible = false + break + } + } + } + assert.Equal(t, tt.expectedVisibleOnly, onlyVisible, "visibility annotation check failed") + + } + + visibleCount := 0 + hiddenCount := 0 + for _, loc := range mergedLocations { + if annotation, ok := loc.Annotations[file.VisibleAnnotationKey]; ok { + if annotation == file.VisibleAnnotation { + visibleCount++ + } else if annotation == file.HiddenAnnotation { + hiddenCount++ + } + } + } + + // for test cases where we expect some hidden annotations... + if !tt.expectedVisibleOnly && tt.expectedLocations > 0 { + assert.Greater(t, hiddenCount, 0, "expected some hidden locations but found none") + assert.Greater(t, visibleCount, 0, "expected some visible locations but found none") + } + + // for test cases where we expect only visible annotations... + if tt.expectedVisibleOnly && tt.expectedLocations > 0 { + assert.Equal(t, tt.expectedLocations, visibleCount, "incorrect number of visible locations") + assert.Equal(t, 0, hiddenCount, "found hidden locations when expecting only visible") + } + }) + } +} + +func TestContainerImageDeepSquash_MergeLocationStreams(t *testing.T) { + tests := []struct { + name string + squashedLocations []file.Location + allLayersLocations []file.Location + expectedLocations int + expectedVisibleOnly bool + }{ + { + name: "empty squashed locations returns empty", + squashedLocations: []file.Location{}, + allLayersLocations: []file.Location{makeLocation("/some/path", 1)}, + expectedLocations: 0, + expectedVisibleOnly: false, + }, + { + name: "only squashed locations returns all as visible", + squashedLocations: []file.Location{ + makeLocation("/path/one", 1), + makeLocation("/path/two", 1), + }, + allLayersLocations: []file.Location{}, + expectedLocations: 2, + expectedVisibleOnly: true, + }, + { + name: "exact match locations are deduped", + squashedLocations: []file.Location{makeLocation("/path/one", 1)}, + allLayersLocations: []file.Location{makeLocation("/path/one", 1)}, + expectedLocations: 1, + expectedVisibleOnly: true, + }, + { + name: "different layers same path not deduped", + squashedLocations: []file.Location{makeLocation("/path/one", 2)}, + allLayersLocations: []file.Location{makeLocation("/path/one", 1)}, + expectedLocations: 2, // 1 visible from squashed + 1 hidden from all layers + expectedVisibleOnly: false, + }, + { + name: "all layers with path not in squashed are excluded", + squashedLocations: []file.Location{makeLocation("/path/one", 1)}, + allLayersLocations: []file.Location{ + makeLocation("/path/one", 2), + makeLocation("/not/in/squashed", 3), + }, + expectedLocations: 2, // 1 from squashed + 1 from all layers (path/one) + expectedVisibleOnly: false, + }, + { + name: "includes all layer versions for paths in squashed", + squashedLocations: []file.Location{ + makeLocation("/path/one", 3), + makeLocation("/path/two", 2), + }, + allLayersLocations: []file.Location{ + makeLocation("/path/one", 1), + makeLocation("/path/one", 2), + makeLocation("/path/two", 2), // will be deduped + makeLocation("/path/two", 3), + makeLocation("/path/two", 4), + }, + expectedLocations: 6, // 2 from squashed + 4 from all layers + expectedVisibleOnly: false, + }, + { + name: "complex scenario with multiple paths and layers", + squashedLocations: []file.Location{ + makeLocation("/bin/bash", 5), + makeLocation("/etc/passwd", 3), + makeLocation("/var/log/syslog", 2), + }, + allLayersLocations: []file.Location{ + makeLocation("/bin/bash", 1), + makeLocation("/bin/bash", 2), + makeLocation("/bin/bash", 3), + makeLocation("/bin/bash", 4), + makeLocation("/bin/bash", 5), // will be deduped + makeLocation("/etc/passwd", 1), + makeLocation("/etc/passwd", 2), + makeLocation("/etc/passwd", 3), // will be deduped + makeLocation("/var/log/syslog", 1), + makeLocation("/var/log/syslog", 2), // will be deduped + makeLocation("/tmp/not-in-squash", 1), // not included + }, + expectedLocations: 10, // 3 from squashed + 7 from all layers (3 excluded due to dedup/path) + expectedVisibleOnly: false, + }, + { + name: "include virtual locations", + squashedLocations: []file.Location{ + makeLocation("/path/one", 1), + makeLocation("/path/two", 2), + makeLocation("/path/to-one", 2), // a symlink + }, + allLayersLocations: []file.Location{ + makeLocation("/path/one", 1), // will be deduped + makeVirtualLocation("/path/one", "/path/to-one", 2), + }, + expectedLocations: 4, + expectedVisibleOnly: false, + }, + { + name: "don't include hidden virtual locations", + squashedLocations: []file.Location{ + makeLocation("/path/one", 1), + }, + allLayersLocations: []file.Location{ + makeLocation("/path/one", 1), // will be deduped + makeVirtualLocation("/path/one", "/path/to-one", 2), // would have been included if /path/to-one was in the squash tree + }, + expectedLocations: 1, + expectedVisibleOnly: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + resolver := &ContainerImageDeepSquash{ + squashed: newMockSimpleResolver(tt.squashedLocations), + } + + squashedChan := make(chan file.Location) + allLayersChan := make(chan file.Location) + + wg := &sync.WaitGroup{} + wg.Add(2) + + go func() { + defer wg.Done() + defer close(squashedChan) + for _, loc := range tt.squashedLocations { + squashedChan <- loc + } + }() + + go func() { + defer wg.Done() + defer close(allLayersChan) + for _, loc := range tt.allLayersLocations { + allLayersChan <- loc + } + }() + + mergedChan := resolver.mergeLocationStreams(ctx, squashedChan, allLayersChan) + + var mergedLocations []file.Location + for loc := range mergedChan { + mergedLocations = append(mergedLocations, loc) + } + + assert.Equal(t, tt.expectedLocations, len(mergedLocations), "incorrect number of merged locations") + + visibleCount := 0 + hiddenCount := 0 + duplicateFound := false + + // track seen locations to verify deduplication + seenLocations := make(map[file.LocationData]int) + + for _, loc := range mergedLocations { + // check for duplicates + seenLocations[loc.LocationData]++ + if seenLocations[loc.LocationData] > 1 { + duplicateFound = true + } + + // count annotations + if annotation, ok := loc.Annotations[file.VisibleAnnotationKey]; ok { + if annotation == file.VisibleAnnotation { + visibleCount++ + } else if annotation == file.HiddenAnnotation { + hiddenCount++ + } + } + } + + assert.False(t, duplicateFound, "found duplicate locations when none expected") + + // check visibility annotations + if tt.expectedVisibleOnly && len(mergedLocations) > 0 { + assert.Equal(t, len(mergedLocations), visibleCount, + "incorrect number of visible locations") + assert.Equal(t, 0, hiddenCount, + "found hidden locations when expecting only visible") + } + + if !tt.expectedVisibleOnly && len(mergedLocations) > 0 { + assert.Greater(t, hiddenCount, 0, + "expected some hidden locations but found none") + assert.Greater(t, visibleCount, 0, + "expected some visible locations but found none") + } + + wg.Wait() + + goleak.VerifyNone(t) + }) + } +} + +func TestContainerImageDeepSquash_MergeLocationStreams_FunCases(t *testing.T) { + + t.Run("concurrent context cancellation", func(t *testing.T) { + upstreamCtx, upstreamCancel := context.WithCancel(context.Background()) + + ctx, cancel := context.WithCancel(context.Background()) + + resolver := &ContainerImageDeepSquash{ + squashed: newMockSimpleResolver(nil), + } + + squashedChan := make(chan file.Location) + allLayersChan := make(chan file.Location) + + wg := &sync.WaitGroup{} + wg.Add(2) + + go func() { + defer wg.Done() + defer close(squashedChan) + + count := 0 + for { + count++ + loc := makeLocation(fmt.Sprintf("/path/%d", count), 2) + select { + case <-upstreamCtx.Done(): + return + case squashedChan <- loc: + } + } + }() + + go func() { + defer wg.Done() + defer close(allLayersChan) + + count := 0 + for { + count++ + loc := makeLocation(fmt.Sprintf("/path/%d", count), 2) + select { + case <-upstreamCtx.Done(): + return + case allLayersChan <- loc: + } + } + }() + + mergedChan := resolver.mergeLocationStreams(ctx, squashedChan, allLayersChan) + + go func() { + <-time.After(5 * time.Millisecond) + cancel() + time.Sleep(10 * time.Millisecond) + upstreamCancel() + }() + + for range mergedChan { + // drain + } + wg.Wait() + + goleak.VerifyNone(t) + }) + + t.Run("empty streams", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + resolver := &ContainerImageDeepSquash{ + squashed: newMockSimpleResolver([]file.Location{}), + } + + squashedChan := make(chan file.Location) + allLayersChan := make(chan file.Location) + close(squashedChan) + close(allLayersChan) + + mergedChan := resolver.mergeLocationStreams(ctx, squashedChan, allLayersChan) + + var count int + // should return immediately with no results (not block) + for range mergedChan { + count++ + } + assert.Equal(t, 0, count, "expected no results from empty streams") + }) + + t.Run("squashed empty but all layers has data", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + resolver := &ContainerImageDeepSquash{ + squashed: newMockSimpleResolver([]file.Location{}), + } + + squashedChan := make(chan file.Location) + allLayersChan := make(chan file.Location) + close(squashedChan) + + wg := &sync.WaitGroup{} + wg.Add(1) + + go func() { + defer close(allLayersChan) + defer wg.Done() + + allLayersChan <- makeLocation("/path/one", 1) + }() + + mergedChan := resolver.mergeLocationStreams(ctx, squashedChan, allLayersChan) + + // should return no results since squashed is empty + var count int + for range mergedChan { + count++ + } + + wg.Wait() + + assert.Equal(t, 0, count, "expected no results when squashed is empty") + }) +} + +func makeLocation(path string, layer int) file.Location { + return file.NewLocationFromCoordinates(file.Coordinates{ + RealPath: path, + FileSystemID: fmt.Sprintf("layer-%d", layer), + }) +} + +func makeVirtualLocation(path, access string, layer int) file.Location { + return file.NewVirtualLocationFromCoordinates(file.Coordinates{ + RealPath: path, + FileSystemID: fmt.Sprintf("layer-%d", layer), + }, access) +} diff --git a/syft/internal/fileresolver/filetree_resolver_test.go b/syft/internal/fileresolver/filetree_resolver_test.go index 7687a41cc..bd5bda010 100644 --- a/syft/internal/fileresolver/filetree_resolver_test.go +++ b/syft/internal/fileresolver/filetree_resolver_test.go @@ -1542,8 +1542,6 @@ func Test_fileResolver_FileContentsByLocation(t *testing.T) { } func TestFileResolver_AllLocations_errorOnDirRequest(t *testing.T) { - defer goleak.VerifyNone(t) - filePath := "./test-fixtures/system_paths/target/home/place" parentPath, err := absoluteSymlinkFreePathToParent(filePath) require.NoError(t, err) @@ -1557,9 +1555,8 @@ func TestFileResolver_AllLocations_errorOnDirRequest(t *testing.T) { for loc := range resolver.AllLocations(ctx) { entry, err := resolver.index.Get(loc.Reference()) require.NoError(t, err) - if entry.Metadata.IsDir() { + if dirLoc == nil && entry.Metadata.IsDir() { dirLoc = &loc - break } } @@ -1568,6 +1565,8 @@ func TestFileResolver_AllLocations_errorOnDirRequest(t *testing.T) { reader, err := resolver.FileContentsByLocation(*dirLoc) require.Error(t, err) require.Nil(t, reader) + + goleak.VerifyNone(t) } func TestFileResolver_AllLocations(t *testing.T) { @@ -1592,10 +1591,11 @@ func TestFileResolver_AllLocations(t *testing.T) { sort.Strings(pathsList) assert.ElementsMatchf(t, expected, pathsList, "expected all paths to be indexed, but found different paths: \n%s", cmp.Diff(expected, paths.List())) + + goleak.VerifyNone(t) } func Test_FileResolver_AllLocationsDoesNotLeakGoRoutine(t *testing.T) { - defer goleak.VerifyNone(t) filePath := "./test-fixtures/system_paths/target/home/place" parentPath, err := absoluteSymlinkFreePathToParent(filePath) require.NoError(t, err) @@ -1609,4 +1609,6 @@ func Test_FileResolver_AllLocationsDoesNotLeakGoRoutine(t *testing.T) { break } cancel() + + goleak.VerifyNone(t) } diff --git a/syft/source/scope.go b/syft/source/scope.go index b1c9a7b7f..02df5cdf0 100644 --- a/syft/source/scope.go +++ b/syft/source/scope.go @@ -12,12 +12,15 @@ const ( SquashedScope Scope = "squashed" // AllLayersScope indicates to catalog content on all layers, regardless if it is visible from the container at runtime. AllLayersScope Scope = "all-layers" + // DeepSquashedScope indicates to catalog content on all layers, but only include content visible from the squashed filesystem representation. + DeepSquashedScope Scope = "deep-squashed" ) // AllScopes is a slice containing all possible scope options var AllScopes = []Scope{ SquashedScope, AllLayersScope, + DeepSquashedScope, } // ParseScope returns a scope as indicated from the given string. @@ -25,8 +28,10 @@ func ParseScope(userStr string) Scope { switch strings.ToLower(userStr) { case SquashedScope.String(): return SquashedScope - case "alllayers", AllLayersScope.String(): + case "all", "alllayers", AllLayersScope.String(): return AllLayersScope + case "deepsquashed", "squasheddeep", "squashed-deep", "deep-squash", "deepsquash", strings.ToLower(DeepSquashedScope.String()): + return DeepSquashedScope } return UnknownScope } diff --git a/syft/source/scope_test.go b/syft/source/scope_test.go index d35eddde6..dd7d728fa 100644 --- a/syft/source/scope_test.go +++ b/syft/source/scope_test.go @@ -20,6 +20,11 @@ func TestParseScope(t *testing.T) { name: "all-layers", want: AllLayersScope, }, + + { + name: "deep-squashed", + want: DeepSquashedScope, + }, // fall back to unknown { name: "make-believe", @@ -48,6 +53,31 @@ func TestParseScope(t *testing.T) { name: "alLlaYerS", want: AllLayersScope, }, + // aliases + { + name: "all", + want: AllLayersScope, + }, + { + name: "deep-squash", + want: DeepSquashedScope, + }, + { + name: "deepsquashed", + want: DeepSquashedScope, + }, + { + name: "squasheddeep", + want: DeepSquashedScope, + }, + { + name: "squashed-deep", + want: DeepSquashedScope, + }, + { + name: "deepsquash", + want: DeepSquashedScope, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { diff --git a/syft/source/stereoscopesource/image_source.go b/syft/source/stereoscopesource/image_source.go index 71afe3cd8..3b23027dc 100644 --- a/syft/source/stereoscopesource/image_source.go +++ b/syft/source/stereoscopesource/image_source.go @@ -103,6 +103,8 @@ func (s stereoscopeImageSource) FileResolver(scope source.Scope) (file.Resolver, res, err = fileresolver.NewFromContainerImageSquash(s.image) case source.AllLayersScope: res, err = fileresolver.NewFromContainerImageAllLayers(s.image) + case source.DeepSquashedScope: + res, err = fileresolver.NewFromContainerImageDeepSquash(s.image) default: return nil, fmt.Errorf("bad image scope provided: %+v", scope) }