syft/internal/relationship/by_file_ownership.go
Dan Luhring bbf3bb5856
fix(relationship): favor real paths over symlinks for ownership by file (#3923)
Signed-off-by: Dan Luhring <dluhring@chainguard.dev>
Signed-off-by: Keith Zantow <kzantow@gmail.com>
Co-authored-by: Keith Zantow <kzantow@gmail.com>
2025-05-23 14:33:19 -04:00

226 lines
6.8 KiB
Go

package relationship
import (
"sort"
"github.com/bmatcuk/doublestar/v4"
"github.com/scylladb/go-set/strset"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/internal/sbomsync"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/sbom"
)
// altRpmDBGlob allows db matches against new locations introduced in fedora:{36,37}
// See https://github.com/anchore/syft/issues/1077 for larger context
const altRpmDBGlob = "**/rpm/{Packages,Packages.db,rpmdb.sqlite}"
var globsForbiddenFromBeingOwned = []string{
// any OS DBs should automatically be ignored to prevent cyclic issues (e.g. the "rpm" RPM owns the path to the
// RPM DB, so if not ignored that package would own all other packages on the system).
pkg.ApkDBGlob,
pkg.DpkgDBGlob,
pkg.RpmDBGlob,
altRpmDBGlob,
// DEB packages share common copyright info between, this does not mean that sharing these paths implies ownership.
"/usr/share/doc/**/copyright",
}
type ownershipByFilesMetadata struct {
Files []string `json:"files"`
}
func ByFileOwnershipOverlapWorker(resolver file.Resolver, accessor sbomsync.Accessor) {
var relationships []artifact.Relationship
accessor.ReadFromSBOM(func(s *sbom.SBOM) {
relationships = byFileOwnershipOverlap(resolver, s.Artifacts.Packages)
})
accessor.WriteToSBOM(func(s *sbom.SBOM) {
s.Relationships = append(s.Relationships, relationships...)
})
}
// byFileOwnershipOverlap creates a package-to-package relationship based on discovering which packages have
// evidence locations that overlap with ownership claim from another package's package manager metadata.
func byFileOwnershipOverlap(resolver file.Resolver, catalog *pkg.Collection) []artifact.Relationship {
var relationships = findOwnershipByFilesRelationships(resolver, catalog)
var edges []artifact.Relationship
for parentID, children := range relationships {
for childID, files := range children {
fs := files.List()
sort.Strings(fs)
parent := catalog.Package(parentID) // TODO: this is potentially expensive
child := catalog.Package(childID) // TODO: this is potentially expensive
if parent == nil {
log.Tracef("parent package not found: %v", parentID)
continue
}
if child == nil {
log.Tracef("child package not found: %v", childID)
continue
}
edges = append(edges, artifact.Relationship{
From: *parent,
To: *child,
Type: artifact.OwnershipByFileOverlapRelationship,
Data: ownershipByFilesMetadata{
Files: fs,
},
})
}
}
return edges
}
// findOwnershipByFilesRelationships find overlaps in file ownership with a file that defines another package. Specifically, a .Location.Path of
// a package is found to be owned by another (from the owner's .Metadata.Files[]).
func findOwnershipByFilesRelationships(resolver file.Resolver, catalog *pkg.Collection) map[artifact.ID]map[artifact.ID]*strset.Set { //nolint:gocognit
var relationships = make(map[artifact.ID]map[artifact.ID]*strset.Set)
if catalog == nil {
return relationships
}
// Build a map of real paths to packages that directly own them. We'll use this
// to check if a file is already owned by the same type of package when we're
// determining ownership via symlink.
directOwnership := directOwnersByPath(catalog)
// Now establish relationships, considering symlinks
for _, candidateOwnerPkg := range catalog.Sorted() {
id := candidateOwnerPkg.ID()
if candidateOwnerPkg.Metadata == nil {
continue
}
// check to see if this is a file owner
pkgFileOwner, ok := candidateOwnerPkg.Metadata.(pkg.FileOwner)
if !ok {
continue
}
for _, ownedFilePath := range pkgFileOwner.OwnedFiles() {
if ownedFilePath == "" {
continue
}
// find paths that result in a hit (includes resolving symlinks)
resolvedPaths := resolvePaths(ownedFilePath, resolver)
for _, resolvedPath := range resolvedPaths {
if matchesAny(resolvedPath, globsForbiddenFromBeingOwned) {
// we skip over known exceptions to file ownership, such as the RPM package owning
// the RPM DB path, otherwise the RPM package would "own" all RPMs, which is not intended
continue
}
// Skip claiming ownership via symlink if another package of the same type
// directly owns this real path. This is the specific fix for the issue where a
// symlink shouldn't allow a package to claim ownership when another package of
// the same type directly owns the real file.
if resolvedPath != ownedFilePath { // This is a resolved symlink path
// Check if another package of the same type directly owns this path
if paths := directOwnership[candidateOwnerPkg.Type]; paths != nil && paths.Has(resolvedPath) {
// Skip this path - a package of the same type directly owns it
continue
}
}
// look for package(s) in the catalog that may be owned by this package and mark the relationship
for _, subPackage := range catalog.PackagesByPath(resolvedPath) {
subID := subPackage.ID()
if subID == id {
continue
}
if _, exists := relationships[id]; !exists {
relationships[id] = make(map[artifact.ID]*strset.Set)
}
if _, exists := relationships[id][subID]; !exists {
relationships[id][subID] = strset.New()
}
relationships[id][subID].Add(resolvedPath)
}
}
}
}
return relationships
}
func directOwnersByPath(catalog *pkg.Collection) map[pkg.Type]*strset.Set {
directOwnership := map[pkg.Type]*strset.Set{}
// First, identify direct ownership of all files
for _, p := range catalog.Sorted() {
if p.Metadata == nil {
continue
}
// check to see if this is a file owner
pkgFileOwner, ok := p.Metadata.(pkg.FileOwner)
if !ok {
continue
}
for _, ownedFilePath := range pkgFileOwner.OwnedFiles() {
if ownedFilePath == "" {
continue
}
// Register direct ownership
paths := directOwnership[p.Type]
if paths == nil {
paths = strset.New()
directOwnership[p.Type] = paths
}
paths.Add(ownedFilePath)
}
}
return directOwnership
}
func resolvePaths(ownedFilePath string, resolver file.Resolver) []string {
// though we have a string path, we need to resolve symlinks and other filesystem oddities since we cannot assume this is a real path
var locs []file.Location
var err error
if resolver != nil {
locs, err = resolver.FilesByPath(ownedFilePath)
if err != nil {
log.WithFields("error", err, "path", ownedFilePath).Trace("unable to find path for owned file")
locs = nil
}
}
ownedFilePaths := strset.New(ownedFilePath)
for _, loc := range locs {
ownedFilePaths.Add(loc.RealPath)
}
return ownedFilePaths.List()
}
func matchesAny(s string, globs []string) bool {
for _, g := range globs {
matches, err := doublestar.Match(g, s)
if err != nil {
log.Errorf("failed to match glob=%q : %+v", g, err)
}
if matches {
return true
}
}
return false
}