syft/internal/relationship/by_file_ownership.go
Alex Goodman b0ab75fd89
Replace core SBOM-creation API with builder pattern (#1383)
* remove existing cataloging API

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* add file cataloging config

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* add package cataloging config

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* add configs for cross-cutting concerns

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* rename CLI option configs to not require import aliases later

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* update all nested structs for the Catalog struct

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* update Catalog cli options

- add new cataloger selection options (selection and default)
- remove the excludeBinaryOverlapByOwnership
- deprecate "catalogers" flag
- add new javascript configuration

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* migrate relationship capabilities to separate internal package

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* refactor golang cataloger to use configuration options when creating packages

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* create internal object to facilitate reading from and writing to an SBOM

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* create a command-like object (task) to facilitate partial SBOM creation

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* add cataloger selection capability

- be able to parse string expressions into a set of resolved actions against sets
- be able to use expressions to select/add/remove tasks to/from the final set of tasks to run

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* add package, file, and environment related tasks

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* update existing file catalogers to use nested UI elements

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* add CreateSBOMConfig that drives the SBOM creation process

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* capture SBOM creation info as a struct

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* add CreateSBOM() function

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* fix tests

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* update docs with SBOM selection help + breaking changes

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* fix multiple override default inputs

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* fix deprecation flag printing to stdout

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* refactor cataloger selection description to separate object

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* address review comments

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* keep expression errors and show specific suggestions only

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* address additional review feedback

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* address more review comments

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* addressed additional PR review feedback

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* fix file selection references

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* remove guess language data generation option

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* add tests for coordinatesForSelection

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* rename relationship attributes

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* add descriptions to relationships config fields

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* improve documentation around configuration options

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* add explicit errors around legacy config entries

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

---------

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
2024-01-12 17:39:13 -05:00

135 lines
4.1 KiB
Go

package relationship
import (
"sort"
"github.com/bmatcuk/doublestar/v4"
"github.com/scylladb/go-set/strset"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/internal/sbomsync"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/sbom"
)
// altRpmDBGlob allows db matches against new locations introduced in fedora:{36,37}
// See https://github.com/anchore/syft/issues/1077 for larger context
const altRpmDBGlob = "**/rpm/{Packages,Packages.db,rpmdb.sqlite}"
var globsForbiddenFromBeingOwned = []string{
// any OS DBs should automatically be ignored to prevent cyclic issues (e.g. the "rpm" RPM owns the path to the
// RPM DB, so if not ignored that package would own all other packages on the system).
pkg.ApkDBGlob,
pkg.DpkgDBGlob,
pkg.RpmDBGlob,
altRpmDBGlob,
// DEB packages share common copyright info between, this does not mean that sharing these paths implies ownership.
"/usr/share/doc/**/copyright",
}
type ownershipByFilesMetadata struct {
Files []string `json:"files"`
}
func byFileOwnershipOverlapWorker(accessor sbomsync.Accessor) {
var relationships []artifact.Relationship
accessor.ReadFromSBOM(func(s *sbom.SBOM) {
relationships = byFileOwnershipOverlap(s.Artifacts.Packages)
})
accessor.WriteToSBOM(func(s *sbom.SBOM) {
s.Relationships = append(s.Relationships, relationships...)
})
}
// byFileOwnershipOverlap creates a package-to-package relationship based on discovering which packages have
// evidence locations that overlap with ownership claim from another package's package manager metadata.
func byFileOwnershipOverlap(catalog *pkg.Collection) []artifact.Relationship {
var relationships = findOwnershipByFilesRelationships(catalog)
var edges []artifact.Relationship
for parentID, children := range relationships {
for childID, files := range children {
fs := files.List()
sort.Strings(fs)
parent := catalog.Package(parentID) // TODO: this is potentially expensive
child := catalog.Package(childID) // TODO: this is potentially expensive
edges = append(edges, artifact.Relationship{
From: parent,
To: child,
Type: artifact.OwnershipByFileOverlapRelationship,
Data: ownershipByFilesMetadata{
Files: fs,
},
})
}
}
return edges
}
// findOwnershipByFilesRelationships find overlaps in file ownership with a file that defines another package. Specifically, a .Location.Path of
// a package is found to be owned by another (from the owner's .Metadata.Files[]).
func findOwnershipByFilesRelationships(catalog *pkg.Collection) map[artifact.ID]map[artifact.ID]*strset.Set {
var relationships = make(map[artifact.ID]map[artifact.ID]*strset.Set)
if catalog == nil {
return relationships
}
for _, candidateOwnerPkg := range catalog.Sorted() {
id := candidateOwnerPkg.ID()
if candidateOwnerPkg.Metadata == nil {
continue
}
// check to see if this is a file owner
pkgFileOwner, ok := candidateOwnerPkg.Metadata.(pkg.FileOwner)
if !ok {
continue
}
for _, ownedFilePath := range pkgFileOwner.OwnedFiles() {
if matchesAny(ownedFilePath, globsForbiddenFromBeingOwned) {
// we skip over known exceptions to file ownership, such as the RPM package owning
// the RPM DB path, otherwise the RPM package would "own" all RPMs, which is not intended
continue
}
// look for package(s) in the catalog that may be owned by this package and mark the relationship
for _, subPackage := range catalog.PackagesByPath(ownedFilePath) {
subID := subPackage.ID()
if subID == id {
continue
}
if _, exists := relationships[id]; !exists {
relationships[id] = make(map[artifact.ID]*strset.Set)
}
if _, exists := relationships[id][subID]; !exists {
relationships[id][subID] = strset.New()
}
relationships[id][subID].Add(ownedFilePath)
}
}
}
return relationships
}
func matchesAny(s string, globs []string) bool {
for _, g := range globs {
matches, err := doublestar.Match(g, s)
if err != nil {
log.Errorf("failed to match glob=%q : %+v", g, err)
}
if matches {
return true
}
}
return false
}