Resolve owned file paths when searching for overlaps (#3828)

* resolve owned file paths when searching for overlaps

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* dont remove empty paths

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

---------

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
This commit is contained in:
Alex Goodman 2025-04-24 17:59:45 -04:00 committed by GitHub
parent 4211d79667
commit 03fa142de9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 138 additions and 27 deletions

View File

@ -9,6 +9,7 @@ import (
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/internal/sbomsync"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/sbom"
)
@ -32,11 +33,11 @@ type ownershipByFilesMetadata struct {
Files []string `json:"files"`
}
func ByFileOwnershipOverlapWorker(accessor sbomsync.Accessor) {
func ByFileOwnershipOverlapWorker(resolver file.Resolver, accessor sbomsync.Accessor) {
var relationships []artifact.Relationship
accessor.ReadFromSBOM(func(s *sbom.SBOM) {
relationships = byFileOwnershipOverlap(s.Artifacts.Packages)
relationships = byFileOwnershipOverlap(resolver, s.Artifacts.Packages)
})
accessor.WriteToSBOM(func(s *sbom.SBOM) {
@ -46,8 +47,8 @@ func ByFileOwnershipOverlapWorker(accessor sbomsync.Accessor) {
// byFileOwnershipOverlap creates a package-to-package relationship based on discovering which packages have
// evidence locations that overlap with ownership claim from another package's package manager metadata.
func byFileOwnershipOverlap(catalog *pkg.Collection) []artifact.Relationship {
var relationships = findOwnershipByFilesRelationships(catalog)
func byFileOwnershipOverlap(resolver file.Resolver, catalog *pkg.Collection) []artifact.Relationship {
var relationships = findOwnershipByFilesRelationships(resolver, catalog)
var edges []artifact.Relationship
for parentID, children := range relationships {
@ -84,7 +85,7 @@ func byFileOwnershipOverlap(catalog *pkg.Collection) []artifact.Relationship {
// findOwnershipByFilesRelationships find overlaps in file ownership with a file that defines another package. Specifically, a .Location.Path of
// a package is found to be owned by another (from the owner's .Metadata.Files[]).
func findOwnershipByFilesRelationships(catalog *pkg.Collection) map[artifact.ID]map[artifact.ID]*strset.Set {
func findOwnershipByFilesRelationships(resolver file.Resolver, catalog *pkg.Collection) map[artifact.ID]map[artifact.ID]*strset.Set { //nolint:gocognit
var relationships = make(map[artifact.ID]map[artifact.ID]*strset.Set)
if catalog == nil {
@ -102,27 +103,31 @@ func findOwnershipByFilesRelationships(catalog *pkg.Collection) map[artifact.ID]
if !ok {
continue
}
for _, ownedFilePath := range pkgFileOwner.OwnedFiles() {
if matchesAny(ownedFilePath, globsForbiddenFromBeingOwned) {
// we skip over known exceptions to file ownership, such as the RPM package owning
// the RPM DB path, otherwise the RPM package would "own" all RPMs, which is not intended
continue
}
// look for package(s) in the catalog that may be owned by this package and mark the relationship
for _, subPackage := range catalog.PackagesByPath(ownedFilePath) {
subID := subPackage.ID()
if subID == id {
for _, ownedFilePath := range pkgFileOwner.OwnedFiles() {
// find the first path that results in a hit
for _, ownedPath := range allPaths(ownedFilePath, resolver) {
if matchesAny(ownedPath, globsForbiddenFromBeingOwned) {
// we skip over known exceptions to file ownership, such as the RPM package owning
// the RPM DB path, otherwise the RPM package would "own" all RPMs, which is not intended
continue
}
if _, exists := relationships[id]; !exists {
relationships[id] = make(map[artifact.ID]*strset.Set)
}
if _, exists := relationships[id][subID]; !exists {
relationships[id][subID] = strset.New()
// look for package(s) in the catalog that may be owned by this package and mark the relationship
for _, subPackage := range catalog.PackagesByPath(ownedPath) {
subID := subPackage.ID()
if subID == id {
continue
}
if _, exists := relationships[id]; !exists {
relationships[id] = make(map[artifact.ID]*strset.Set)
}
if _, exists := relationships[id][subID]; !exists {
relationships[id][subID] = strset.New()
}
relationships[id][subID].Add(ownedPath)
}
relationships[id][subID].Add(ownedFilePath)
}
}
}
@ -130,6 +135,25 @@ func findOwnershipByFilesRelationships(catalog *pkg.Collection) map[artifact.ID]
return relationships
}
func allPaths(ownedFilePath string, resolver file.Resolver) []string {
// though we have a string path, we need to resolve symlinks and other filesystem oddities since we cannot assume this is a real path
var locs []file.Location
var err error
if resolver != nil {
locs, err = resolver.FilesByPath(ownedFilePath)
if err != nil {
log.WithFields("error", err, "path", ownedFilePath).Trace("unable to find path for owned file")
locs = nil
}
}
ownedFilePaths := strset.New(ownedFilePath)
for _, loc := range locs {
ownedFilePaths.Add(loc.RealPath)
}
return ownedFilePaths.List()
}
func matchesAny(s string, globs []string) bool {
for _, g := range globs {
matches, err := doublestar.Match(g, s)

View File

@ -4,7 +4,7 @@ import (
"testing"
"github.com/google/go-cmp/cmp"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/anchore/syft/internal/cmptest"
"github.com/anchore/syft/syft/artifact"
@ -12,11 +12,29 @@ import (
"github.com/anchore/syft/syft/pkg"
)
type mockFR struct {
file.Resolver
translate map[string]string
}
func (m mockFR) FilesByPath(paths ...string) ([]file.Location, error) {
var results []file.Location
for _, p := range paths {
tPath, ok := m.translate[p]
if !ok {
tPath = p
}
results = append(results, file.NewLocation(tPath))
}
return results, nil
}
func TestOwnershipByFilesRelationship(t *testing.T) {
tests := []struct {
name string
setup func(t testing.TB) ([]pkg.Package, []artifact.Relationship)
name string
resolver file.Resolver
setup func(t testing.TB) ([]pkg.Package, []artifact.Relationship)
}{
{
name: "owns-by-real-path",
@ -60,6 +78,75 @@ func TestOwnershipByFilesRelationship(t *testing.T) {
return []pkg.Package{parent, child}, []artifact.Relationship{relationship}
},
},
{
name: "misses-by-dead-symlink",
resolver: mockFR{
translate: map[string]string{
"/bin/gzip": "", // treat this as a dead symlink
},
},
setup: func(t testing.TB) ([]pkg.Package, []artifact.Relationship) {
parent := pkg.Package{
Type: pkg.DebPkg,
Metadata: pkg.DpkgDBEntry{
Files: []pkg.DpkgFileRecord{
{Path: "/bin/gzip"}, // this symlinks to gzip via /bin -> /usr/bin
},
},
}
parent.SetID()
child := pkg.Package{
Locations: file.NewLocationSet(
file.NewVirtualLocation("/usr/bin/gzip", "/usr/bin/gzip"),
),
Type: pkg.BinaryPkg,
}
child.SetID()
return []pkg.Package{parent, child}, nil // importantly, no relationship is expected
},
},
{
name: "owns-by-symlink",
resolver: mockFR{
translate: map[string]string{
"/bin/gzip": "/usr/bin/gzip", // if there is a string path of /bin/gzip then return the real path of /usr/bin/gzip
},
},
setup: func(t testing.TB) ([]pkg.Package, []artifact.Relationship) {
parent := pkg.Package{
Type: pkg.DebPkg,
Metadata: pkg.DpkgDBEntry{
Files: []pkg.DpkgFileRecord{
{Path: "/bin/gzip"}, // this symlinks to gzip via /bin -> /usr/bin
},
},
}
parent.SetID()
child := pkg.Package{
Locations: file.NewLocationSet(
file.NewVirtualLocation("/usr/bin/gzip", "/usr/bin/gzip"),
),
Type: pkg.BinaryPkg,
}
child.SetID()
relationship := artifact.Relationship{
From: parent,
To: child,
Type: artifact.OwnershipByFileOverlapRelationship,
Data: ownershipByFilesMetadata{
Files: []string{
"/usr/bin/gzip",
},
},
}
return []pkg.Package{parent, child}, []artifact.Relationship{relationship}
},
},
{
name: "owns-by-virtual-path",
setup: func(t testing.TB) ([]pkg.Package, []artifact.Relationship) {
@ -140,9 +227,9 @@ func TestOwnershipByFilesRelationship(t *testing.T) {
t.Run(test.name, func(t *testing.T) {
pkgs, expectedRelations := test.setup(t)
c := pkg.NewCollection(pkgs...)
relationships := byFileOwnershipOverlap(c)
relationships := byFileOwnershipOverlap(test.resolver, c)
assert.Len(t, relationships, len(expectedRelations))
require.Len(t, relationships, len(expectedRelations))
for idx, expectedRelationship := range expectedRelations {
actualRelationship := relationships[idx]
if d := cmp.Diff(expectedRelationship, actualRelationship, cmptest.DefaultOptions()...); d != "" {

View File

@ -46,7 +46,7 @@ func finalizeRelationships(resolver file.Resolver, builder sbomsync.Builder, cfg
// add relationships showing packages that are evident by a file which is owned by another package (package-to-package)
if cfg.PackageFileOwnershipOverlap {
relationship.ByFileOwnershipOverlapWorker(accessor)
relationship.ByFileOwnershipOverlapWorker(resolver, accessor)
}
// conditionally remove binary packages based on file ownership overlap relationships found