annotate hidden paths in all-layers scope (#3855)

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
This commit is contained in:
Alex Goodman 2025-05-06 09:50:04 -04:00 committed by GitHub
parent 1ba1186410
commit 7b25ea5eda
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 104 additions and 17 deletions

View File

@ -18,6 +18,7 @@ var _ file.Resolver = (*ContainerImageAllLayers)(nil)
type ContainerImageAllLayers struct { type ContainerImageAllLayers struct {
img *image.Image img *image.Image
layers []int layers []int
markVisibility bool
} }
// NewFromContainerImageAllLayers returns a new resolver from the perspective of all image layers for the given image. // NewFromContainerImageAllLayers returns a new resolver from the perspective of all image layers for the given image.
@ -33,6 +34,10 @@ func NewFromContainerImageAllLayers(img *image.Image) (*ContainerImageAllLayers,
return &ContainerImageAllLayers{ return &ContainerImageAllLayers{
img: img, img: img,
layers: layers, layers: layers,
// This is the entrypoint for the user-facing implementation, which should always annotate locations.
// We have other resolvers that use this implementation that are already responsible
// for marking visibility, so we don't need to do it all of the time (a small performance optimization).
markVisibility: true,
}, nil }, nil
} }
@ -112,7 +117,9 @@ func (r *ContainerImageAllLayers) FilesByPath(paths ...string) ([]file.Location,
return nil, err return nil, err
} }
for _, result := range results { for _, result := range results {
uniqueLocations = append(uniqueLocations, file.NewLocationFromImage(path, result, r.img)) l := file.NewLocationFromImage(path, result, r.img)
r.annotateLocation(&l)
uniqueLocations = append(uniqueLocations, l)
} }
} }
} }
@ -156,7 +163,9 @@ func (r *ContainerImageAllLayers) FilesByGlob(patterns ...string) ([]file.Locati
return nil, err return nil, err
} }
for _, refResult := range refResults { for _, refResult := range refResults {
uniqueLocations = append(uniqueLocations, file.NewLocationFromImage(string(result.RequestPath), refResult, r.img)) l := file.NewLocationFromImage(string(result.RequestPath), refResult, r.img)
r.annotateLocation(&l)
uniqueLocations = append(uniqueLocations, l)
} }
} }
} }
@ -172,7 +181,7 @@ func (r *ContainerImageAllLayers) RelativeFileByPath(location file.Location, pat
exists, relativeRef, err := layer.SquashedTree.File(stereoscopeFile.Path(path), filetree.FollowBasenameLinks) exists, relativeRef, err := layer.SquashedTree.File(stereoscopeFile.Path(path), filetree.FollowBasenameLinks)
if err != nil { if err != nil {
log.Errorf("failed to find path=%q in squash: %+w", path, err) log.Errorf("failed to find path=%q in squash: %+v", path, err)
return nil return nil
} }
if !exists && !relativeRef.HasReference() { if !exists && !relativeRef.HasReference() {
@ -180,6 +189,7 @@ func (r *ContainerImageAllLayers) RelativeFileByPath(location file.Location, pat
} }
relativeLocation := file.NewLocationFromImage(path, *relativeRef.Reference, r.img) relativeLocation := file.NewLocationFromImage(path, *relativeRef.Reference, r.img)
r.annotateLocation(&relativeLocation)
return &relativeLocation return &relativeLocation
} }
@ -228,7 +238,9 @@ func (r *ContainerImageAllLayers) FilesByMIMEType(types ...string) ([]file.Locat
return nil, err return nil, err
} }
for _, refResult := range refResults { for _, refResult := range refResults {
uniqueLocations = append(uniqueLocations, file.NewLocationFromImage(string(ref.RequestPath), refResult, r.img)) l := file.NewLocationFromImage(string(ref.RequestPath), refResult, r.img)
r.annotateLocation(&l)
uniqueLocations = append(uniqueLocations, l)
} }
} }
} }
@ -243,10 +255,12 @@ func (r *ContainerImageAllLayers) AllLocations(ctx context.Context) <-chan file.
for _, layerIdx := range r.layers { for _, layerIdx := range r.layers {
tree := r.img.Layers[layerIdx].Tree tree := r.img.Layers[layerIdx].Tree
for _, ref := range tree.AllFiles(stereoscopeFile.AllTypes()...) { for _, ref := range tree.AllFiles(stereoscopeFile.AllTypes()...) {
l := file.NewLocationFromImage(string(ref.RealPath), ref, r.img)
r.annotateLocation(&l)
select { select {
case <-ctx.Done(): case <-ctx.Done():
return return
case results <- file.NewLocationFromImage(string(ref.RealPath), ref, r.img): case results <- l:
continue continue
} }
} }
@ -258,3 +272,36 @@ func (r *ContainerImageAllLayers) AllLocations(ctx context.Context) <-chan file.
func (r *ContainerImageAllLayers) FileMetadataByLocation(location file.Location) (file.Metadata, error) { func (r *ContainerImageAllLayers) FileMetadataByLocation(location file.Location) (file.Metadata, error) {
return fileMetadataByLocation(r.img, location) return fileMetadataByLocation(r.img, location)
} }
func (r *ContainerImageAllLayers) annotateLocation(l *file.Location) {
if !r.markVisibility || l == nil {
return
}
givenRef := l.Reference()
annotation := file.VisibleAnnotation
// if we find a location for a path that matches the query (e.g. **/node_modules) but is not present in the squashed tree, skip it
ref, err := r.img.SquashedSearchContext.SearchByPath(l.RealPath, filetree.DoNotFollowDeadBasenameLinks)
if err != nil || !ref.HasReference() {
annotation = file.HiddenAnnotation
} else if ref.ID() != givenRef.ID() {
// we may have the path in the squashed tree, but this must not be in the same layer
annotation = file.HiddenAnnotation
}
// not only should the real path to the file exist, but the way we took to get there should also exist
// (e.g. if we are looking for /etc/passwd, but the real path is /etc/passwd -> /etc/passwd-1, then we should
// make certain that /etc/passwd-1 exists)
if annotation == file.VisibleAnnotation && l.AccessPath != "" {
ref, err := r.img.SquashedSearchContext.SearchByPath(l.AccessPath, filetree.DoNotFollowDeadBasenameLinks)
if err != nil || !ref.HasReference() {
annotation = file.HiddenAnnotation
} else if ref.ID() != givenRef.ID() {
// we may have the path in the squashed tree, but this must not be in the same layer
annotation = file.HiddenAnnotation
}
}
l.Annotations[file.VisibleAnnotationKey] = annotation
}

View File

@ -522,16 +522,26 @@ func TestAllLayersResolver_AllLocations(t *testing.T) {
paths := strset.New() paths := strset.New()
ctx, cancel := context.WithCancel(context.Background()) ctx, cancel := context.WithCancel(context.Background())
defer cancel() defer cancel()
visibleSet := strset.New()
hiddenSet := strset.New()
for loc := range resolver.AllLocations(ctx) { for loc := range resolver.AllLocations(ctx) {
paths.Add(loc.RealPath) paths.Add(loc.RealPath)
switch loc.Annotations[file.VisibleAnnotationKey] {
case file.VisibleAnnotation:
visibleSet.Add(loc.RealPath)
case file.HiddenAnnotation:
hiddenSet.Add(loc.RealPath)
case "":
t.Errorf("expected visibility annotation for location: %+v", loc)
} }
expected := []string{ }
visible := []string{
"/Dockerfile", "/Dockerfile",
"/file-1.txt", "/file-3.txt", // this is a deadlink pointing to /file-1.txt (which has been deleted)
"/file-3.txt",
"/target", "/target",
"/target/file-2.txt", "/target/file-2.txt",
}
hidden := []string{
"/.wh.bin", "/.wh.bin",
"/.wh.file-1.txt", "/.wh.file-1.txt",
"/.wh.lib", "/.wh.lib",
@ -619,6 +629,7 @@ func TestAllLayersResolver_AllLocations(t *testing.T) {
"/bin/usleep", "/bin/usleep",
"/bin/watch", "/bin/watch",
"/bin/zcat", "/bin/zcat",
"/file-1.txt",
"/lib", "/lib",
"/lib/apk", "/lib/apk",
"/lib/apk/db", "/lib/apk/db",
@ -641,15 +652,41 @@ func TestAllLayersResolver_AllLocations(t *testing.T) {
"/lib/sysctl.d/00-alpine.conf", "/lib/sysctl.d/00-alpine.conf",
} }
var expected []string
expected = append(expected, visible...)
expected = append(expected, hidden...)
sort.Strings(expected)
cleanPaths := func(s *strset.Set) {
// depending on how the image is built (either from linux or mac), sys and proc might accidentally be added to the image. // depending on how the image is built (either from linux or mac), sys and proc might accidentally be added to the image.
// this isn't important for the test, so we remove them. // this isn't important for the test, so we remove them.
paths.Remove("/proc", "/sys", "/dev", "/etc") s.Remove("/proc", "/sys", "/dev", "/etc")
// Remove cache created by Mac Rosetta when emulating different arches // Remove cache created by Mac Rosetta when emulating different arches
paths.Remove("/.cache/rosetta", "/.cache") s.Remove("/.cache/rosetta", "/.cache")
}
cleanPaths(paths)
cleanPaths(visibleSet)
cleanPaths(hiddenSet)
pathsList := paths.List() pathsList := paths.List()
sort.Strings(pathsList) sort.Strings(pathsList)
visibleSetList := visibleSet.List()
sort.Strings(visibleSetList)
hiddenSetList := hiddenSet.List()
sort.Strings(hiddenSetList)
if d := cmp.Diff(expected, pathsList); d != "" {
t.Errorf("unexpected paths (-want +got):\n%s", d)
}
if d := cmp.Diff(visible, visibleSetList); d != "" {
t.Errorf("unexpected visible paths (-want +got):\n%s", d)
}
if d := cmp.Diff(hidden, hiddenSetList); d != "" {
t.Errorf("unexpected hidden paths (-want +got):\n%s", d)
}
assert.ElementsMatchf(t, expected, pathsList, "expected all paths to be indexed, but found different paths: \n%s", cmp.Diff(expected, paths.List()))
} }

View File

@ -31,6 +31,9 @@ func NewFromContainerImageDeepSquash(img *image.Image) (*ContainerImageDeepSquas
return nil, err return nil, err
} }
// we will do the work here to mark visibility with results from two resolvers (don't do the work twice!)
allLayers.markVisibility = false
return &ContainerImageDeepSquash{ return &ContainerImageDeepSquash{
squashed: squashed, squashed: squashed,
allLayers: allLayers, allLayers: allLayers,