diff --git a/syft/internal/fileresolver/container_image_all_layers.go b/syft/internal/fileresolver/container_image_all_layers.go index a2aab3983..ad7e9e0d3 100644 --- a/syft/internal/fileresolver/container_image_all_layers.go +++ b/syft/internal/fileresolver/container_image_all_layers.go @@ -16,8 +16,9 @@ var _ file.Resolver = (*ContainerImageAllLayers)(nil) // ContainerImageAllLayers implements path and content access for the AllLayers source option for container image data sources. type ContainerImageAllLayers struct { - img *image.Image - layers []int + img *image.Image + layers []int + markVisibility bool } // NewFromContainerImageAllLayers returns a new resolver from the perspective of all image layers for the given image. @@ -33,6 +34,10 @@ func NewFromContainerImageAllLayers(img *image.Image) (*ContainerImageAllLayers, return &ContainerImageAllLayers{ img: img, layers: layers, + // This is the entrypoint for the user-facing implementation, which should always annotate locations. + // We have other resolvers that use this implementation that are already responsible + // for marking visibility, so we don't need to do it all of the time (a small performance optimization). + markVisibility: true, }, nil } @@ -112,7 +117,9 @@ func (r *ContainerImageAllLayers) FilesByPath(paths ...string) ([]file.Location, return nil, err } for _, result := range results { - uniqueLocations = append(uniqueLocations, file.NewLocationFromImage(path, result, r.img)) + l := file.NewLocationFromImage(path, result, r.img) + r.annotateLocation(&l) + uniqueLocations = append(uniqueLocations, l) } } } @@ -156,7 +163,9 @@ func (r *ContainerImageAllLayers) FilesByGlob(patterns ...string) ([]file.Locati return nil, err } for _, refResult := range refResults { - uniqueLocations = append(uniqueLocations, file.NewLocationFromImage(string(result.RequestPath), refResult, r.img)) + l := file.NewLocationFromImage(string(result.RequestPath), refResult, r.img) + r.annotateLocation(&l) + uniqueLocations = append(uniqueLocations, l) } } } @@ -172,7 +181,7 @@ func (r *ContainerImageAllLayers) RelativeFileByPath(location file.Location, pat exists, relativeRef, err := layer.SquashedTree.File(stereoscopeFile.Path(path), filetree.FollowBasenameLinks) if err != nil { - log.Errorf("failed to find path=%q in squash: %+w", path, err) + log.Errorf("failed to find path=%q in squash: %+v", path, err) return nil } if !exists && !relativeRef.HasReference() { @@ -180,6 +189,7 @@ func (r *ContainerImageAllLayers) RelativeFileByPath(location file.Location, pat } relativeLocation := file.NewLocationFromImage(path, *relativeRef.Reference, r.img) + r.annotateLocation(&relativeLocation) return &relativeLocation } @@ -228,7 +238,9 @@ func (r *ContainerImageAllLayers) FilesByMIMEType(types ...string) ([]file.Locat return nil, err } for _, refResult := range refResults { - uniqueLocations = append(uniqueLocations, file.NewLocationFromImage(string(ref.RequestPath), refResult, r.img)) + l := file.NewLocationFromImage(string(ref.RequestPath), refResult, r.img) + r.annotateLocation(&l) + uniqueLocations = append(uniqueLocations, l) } } } @@ -243,10 +255,12 @@ func (r *ContainerImageAllLayers) AllLocations(ctx context.Context) <-chan file. for _, layerIdx := range r.layers { tree := r.img.Layers[layerIdx].Tree for _, ref := range tree.AllFiles(stereoscopeFile.AllTypes()...) { + l := file.NewLocationFromImage(string(ref.RealPath), ref, r.img) + r.annotateLocation(&l) select { case <-ctx.Done(): return - case results <- file.NewLocationFromImage(string(ref.RealPath), ref, r.img): + case results <- l: continue } } @@ -258,3 +272,36 @@ func (r *ContainerImageAllLayers) AllLocations(ctx context.Context) <-chan file. func (r *ContainerImageAllLayers) FileMetadataByLocation(location file.Location) (file.Metadata, error) { return fileMetadataByLocation(r.img, location) } + +func (r *ContainerImageAllLayers) annotateLocation(l *file.Location) { + if !r.markVisibility || l == nil { + return + } + + givenRef := l.Reference() + annotation := file.VisibleAnnotation + + // if we find a location for a path that matches the query (e.g. **/node_modules) but is not present in the squashed tree, skip it + ref, err := r.img.SquashedSearchContext.SearchByPath(l.RealPath, filetree.DoNotFollowDeadBasenameLinks) + if err != nil || !ref.HasReference() { + annotation = file.HiddenAnnotation + } else if ref.ID() != givenRef.ID() { + // we may have the path in the squashed tree, but this must not be in the same layer + annotation = file.HiddenAnnotation + } + + // not only should the real path to the file exist, but the way we took to get there should also exist + // (e.g. if we are looking for /etc/passwd, but the real path is /etc/passwd -> /etc/passwd-1, then we should + // make certain that /etc/passwd-1 exists) + if annotation == file.VisibleAnnotation && l.AccessPath != "" { + ref, err := r.img.SquashedSearchContext.SearchByPath(l.AccessPath, filetree.DoNotFollowDeadBasenameLinks) + if err != nil || !ref.HasReference() { + annotation = file.HiddenAnnotation + } else if ref.ID() != givenRef.ID() { + // we may have the path in the squashed tree, but this must not be in the same layer + annotation = file.HiddenAnnotation + } + } + + l.Annotations[file.VisibleAnnotationKey] = annotation +} diff --git a/syft/internal/fileresolver/container_image_all_layers_test.go b/syft/internal/fileresolver/container_image_all_layers_test.go index cee562b9d..7d36baa8b 100644 --- a/syft/internal/fileresolver/container_image_all_layers_test.go +++ b/syft/internal/fileresolver/container_image_all_layers_test.go @@ -522,16 +522,26 @@ func TestAllLayersResolver_AllLocations(t *testing.T) { paths := strset.New() ctx, cancel := context.WithCancel(context.Background()) defer cancel() + visibleSet := strset.New() + hiddenSet := strset.New() for loc := range resolver.AllLocations(ctx) { paths.Add(loc.RealPath) + switch loc.Annotations[file.VisibleAnnotationKey] { + case file.VisibleAnnotation: + visibleSet.Add(loc.RealPath) + case file.HiddenAnnotation: + hiddenSet.Add(loc.RealPath) + case "": + t.Errorf("expected visibility annotation for location: %+v", loc) + } } - expected := []string{ + visible := []string{ "/Dockerfile", - "/file-1.txt", - "/file-3.txt", + "/file-3.txt", // this is a deadlink pointing to /file-1.txt (which has been deleted) "/target", "/target/file-2.txt", - + } + hidden := []string{ "/.wh.bin", "/.wh.file-1.txt", "/.wh.lib", @@ -619,6 +629,7 @@ func TestAllLayersResolver_AllLocations(t *testing.T) { "/bin/usleep", "/bin/watch", "/bin/zcat", + "/file-1.txt", "/lib", "/lib/apk", "/lib/apk/db", @@ -641,15 +652,41 @@ func TestAllLayersResolver_AllLocations(t *testing.T) { "/lib/sysctl.d/00-alpine.conf", } - // depending on how the image is built (either from linux or mac), sys and proc might accidentally be added to the image. - // this isn't important for the test, so we remove them. - paths.Remove("/proc", "/sys", "/dev", "/etc") + var expected []string + expected = append(expected, visible...) + expected = append(expected, hidden...) + sort.Strings(expected) - // Remove cache created by Mac Rosetta when emulating different arches - paths.Remove("/.cache/rosetta", "/.cache") + cleanPaths := func(s *strset.Set) { + // depending on how the image is built (either from linux or mac), sys and proc might accidentally be added to the image. + // this isn't important for the test, so we remove them. + s.Remove("/proc", "/sys", "/dev", "/etc") + + // Remove cache created by Mac Rosetta when emulating different arches + s.Remove("/.cache/rosetta", "/.cache") + } + + cleanPaths(paths) + cleanPaths(visibleSet) + cleanPaths(hiddenSet) pathsList := paths.List() sort.Strings(pathsList) + visibleSetList := visibleSet.List() + sort.Strings(visibleSetList) + hiddenSetList := hiddenSet.List() + sort.Strings(hiddenSetList) + + if d := cmp.Diff(expected, pathsList); d != "" { + t.Errorf("unexpected paths (-want +got):\n%s", d) + } + + if d := cmp.Diff(visible, visibleSetList); d != "" { + t.Errorf("unexpected visible paths (-want +got):\n%s", d) + } + + if d := cmp.Diff(hidden, hiddenSetList); d != "" { + t.Errorf("unexpected hidden paths (-want +got):\n%s", d) + } - assert.ElementsMatchf(t, expected, pathsList, "expected all paths to be indexed, but found different paths: \n%s", cmp.Diff(expected, paths.List())) } diff --git a/syft/internal/fileresolver/container_image_deep_squash.go b/syft/internal/fileresolver/container_image_deep_squash.go index 0666d6928..f8ecc9af2 100644 --- a/syft/internal/fileresolver/container_image_deep_squash.go +++ b/syft/internal/fileresolver/container_image_deep_squash.go @@ -31,6 +31,9 @@ func NewFromContainerImageDeepSquash(img *image.Image) (*ContainerImageDeepSquas return nil, err } + // we will do the work here to mark visibility with results from two resolvers (don't do the work twice!) + allLayers.markVisibility = false + return &ContainerImageDeepSquash{ squashed: squashed, allLayers: allLayers,