From 78ad3d648f2483f0eaaa3e726b925712e73d571c Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Wed, 13 Mar 2024 18:09:19 -0400 Subject: [PATCH] [wip] prototype binary relationships Signed-off-by: Alex Goodman --- .../binary/binary_dependencies.go | 97 +++++++++++++++ .../relationship/binary/relationship_index.go | 58 +++++++++ .../binary/shared_library_index.go | 117 ++++++++++++++++++ internal/relationship/finalize.go | 13 +- internal/task/relationship_tasks.go | 3 +- 5 files changed, 286 insertions(+), 2 deletions(-) create mode 100644 internal/relationship/binary/binary_dependencies.go create mode 100644 internal/relationship/binary/relationship_index.go create mode 100644 internal/relationship/binary/shared_library_index.go diff --git a/internal/relationship/binary/binary_dependencies.go b/internal/relationship/binary/binary_dependencies.go new file mode 100644 index 000000000..554ed6c31 --- /dev/null +++ b/internal/relationship/binary/binary_dependencies.go @@ -0,0 +1,97 @@ +package binary + +import ( + "path" + + "github.com/anchore/syft/internal/sbomsync" + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/sbom" +) + +func NewDependencyRelationships(resolver file.Resolver, accessor sbomsync.Accessor) []artifact.Relationship { + // TODO: consider library format (e.g. ELF, Mach-O, PE) for the meantime assume all binaries are homogeneous format + + // start with building new file-to-file relationships for executables-to-executables + // you need to make certain that they are unique, store in a map[id]map[id]relationship to avoid dupes. + // before creating the new file-to-file relationship, check to see if there are packages that represent each + // file. If there are, create a package-to-package, file-to-package, or package-to-file relationship as appropriate. + + // 1 & 2... build an index of all shared libraries and their owning packages to search against + index := newShareLibIndex(resolver, accessor) + + // 3. craft package-to-package or package-to-file relationships that represent binary shared library dependencies + // note: prefer package-to-package relationships over package-to-file relationships + + relIndex := newRelationshipIndex() + accessor.ReadFromSBOM(func(s *sbom.SBOM) { + // read all existing dependencyOf relationships + for _, r := range s.Relationships { + if r.Type != artifact.DependencyOfRelationship { + continue + } + relIndex.track(r) + } + }) + + // find all new relationships to add... + accessor.ReadFromSBOM(func(s *sbom.SBOM) { + for _, parentPkg := range s.Artifacts.Packages.Sorted(pkg.BinaryPkg) { + for _, evidentLocation := range parentPkg.Locations.ToSlice() { + if evidentLocation.Annotations[pkg.EvidenceAnnotationKey] != pkg.PrimaryEvidenceAnnotation { + continue + } + + // find all libraries that this package depends on + exec, ok := s.Artifacts.Executables[evidentLocation.Coordinates] + if !ok { + continue + } + + for _, libReference := range exec.ImportedLibraries { + // TODO: is this always a basename? technically no, it could be a path... + libBasename := path.Base(libReference) + + pkgsThatOwnLib := index.owningLibraryPackage(libBasename) + if pkgsThatOwnLib == nil { + // create package-to-file relationship... + // if there is more than one library for this given library name, then we will include + // all of them as dependencies since we don't know the LD_LIBRARY_PATH order + // TODO: add configuration for LD_LIBRARY_PATH order? + for _, libCoord := range index.owningLibraryLocations(libBasename).ToSlice() { + relIndex.add( + artifact.Relationship{ + From: libCoord, + To: parentPkg, + Type: artifact.DependencyOfRelationship, + }, + ) + } + + // don't create a package-to-package relationship for this library... since we can't + continue + } + + // create a package-to-package relationship between the binary package and the library package + // if there is more than one library for this given library name, then we will include + // all of them as dependencies since we don't know the LD_LIBRARY_PATH order + for _, pkgThatOwnsLib := range pkgsThatOwnLib.Sorted() { + relIndex.add( + artifact.Relationship{ + From: pkgThatOwnsLib, + To: parentPkg, + Type: artifact.DependencyOfRelationship, + }, + ) + } + } + } + } + }) + + // so far this handles the first order dependencies from the binary package. Odds are that the OS package manager + // will have already created a package-to-package relationship for the lib packages to other lib packages. + + return relIndex.newRelationships() +} diff --git a/internal/relationship/binary/relationship_index.go b/internal/relationship/binary/relationship_index.go new file mode 100644 index 000000000..487d59e8e --- /dev/null +++ b/internal/relationship/binary/relationship_index.go @@ -0,0 +1,58 @@ +package binary + +import ( + "github.com/scylladb/go-set/strset" + + "github.com/anchore/syft/syft/artifact" +) + +type relationshipIndex struct { + typesByFromTo map[artifact.ID]map[artifact.ID]*strset.Set + additional []artifact.Relationship +} + +func newRelationshipIndex(existing ...artifact.Relationship) *relationshipIndex { + r := &relationshipIndex{ + typesByFromTo: make(map[artifact.ID]map[artifact.ID]*strset.Set), + additional: make([]artifact.Relationship, 0), + } + for _, rel := range existing { + r.track(rel) + } + return r +} + +// track this relationship as "exists" in the index (this is used to prevent duplicate relationships from being added). +// returns true if the relationship is new to the index, false otherwise. +func (i *relationshipIndex) track(r artifact.Relationship) bool { + fromID := r.From.ID() + if _, ok := i.typesByFromTo[fromID]; !ok { + i.typesByFromTo[fromID] = make(map[artifact.ID]*strset.Set) + } + + toID := r.To.ID() + if _, ok := i.typesByFromTo[fromID][toID]; !ok { + i.typesByFromTo[fromID][toID] = strset.New() + } + + var exists bool + if i.typesByFromTo[fromID][toID].Has(string(r.Type)) { + exists = true + } + + i.typesByFromTo[fromID][toID].Add(string(r.Type)) + return exists +} + +// add a new relationship to the index, returning true if the relationship is new to the index, false otherwise (thus is a duplicate). +func (i *relationshipIndex) add(r artifact.Relationship) bool { + if !i.track(r) { + i.additional = append(i.additional, r) + return true + } + return false +} + +func (i *relationshipIndex) newRelationships() []artifact.Relationship { + return i.additional +} diff --git a/internal/relationship/binary/shared_library_index.go b/internal/relationship/binary/shared_library_index.go new file mode 100644 index 000000000..478cc815d --- /dev/null +++ b/internal/relationship/binary/shared_library_index.go @@ -0,0 +1,117 @@ +package binary + +import ( + "path" + + "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/internal/sbomsync" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/sbom" +) + +type sharedLibraryIndex struct { + libLocationsByBasename map[string]file.CoordinateSet + allLibLocations file.CoordinateSet + packagesByLibraryPath map[file.Coordinates]*pkg.Collection +} + +func newShareLibIndex(resolver file.Resolver, accessor sbomsync.Accessor) *sharedLibraryIndex { + s := &sharedLibraryIndex{ + libLocationsByBasename: make(map[string]file.CoordinateSet), + allLibLocations: file.NewCoordinateSet(), + packagesByLibraryPath: make(map[file.Coordinates]*pkg.Collection), + } + + s.build(resolver, accessor) + + return s +} + +func (i *sharedLibraryIndex) build(resolver file.Resolver, accessor sbomsync.Accessor) { + // 1. map out all locations that provide libraries (indexed by the basename) + i.libLocationsByBasename, i.allLibLocations = locationsThatProvideLibraries(accessor) + + // 2. for each library path, find all packages that claim ownership of the library + i.packagesByLibraryPath = packagesWithLibraryOwnership(resolver, accessor, i.allLibLocations) +} + +func (i *sharedLibraryIndex) owningLibraryLocations(libraryBasename string) file.CoordinateSet { + if set, ok := i.libLocationsByBasename[libraryBasename]; ok { + return set + } + + return file.NewCoordinateSet() +} + +func (i *sharedLibraryIndex) owningLibraryPackage(libraryBasename string) *pkg.Collection { + // find all packages that own a library by it's basename + if set, ok := i.libLocationsByBasename[libraryBasename]; ok { + for _, coord := range set.ToSlice() { + if pkgSet, ok := i.packagesByLibraryPath[coord]; ok { + return pkgSet + } + } + } + + return nil +} + +func locationsThatProvideLibraries(accessor sbomsync.Accessor) (map[string]file.CoordinateSet, file.CoordinateSet) { + // map out all locations that provide libraries (indexed by the basename) + libLocationsByBasename := make(map[string]file.CoordinateSet) + allLibLocations := file.NewCoordinateSet() + + accessor.ReadFromSBOM(func(s *sbom.SBOM) { + for coord, f := range s.Artifacts.Executables { + if !f.HasExports { + continue + } + + basename := path.Base(coord.RealPath) + set := libLocationsByBasename[basename] + set.Add(coord) + allLibLocations.Add(coord) + libLocationsByBasename[basename] = set + } + }) + + return libLocationsByBasename, allLibLocations +} +func packagesWithLibraryOwnership(resolver file.Resolver, accessor sbomsync.Accessor, allLibLocations file.CoordinateSet) map[file.Coordinates]*pkg.Collection { + // map out all packages that claim ownership of a library at a specific path + packagesByLibraryPath := make(map[file.Coordinates]*pkg.Collection) + + accessor.ReadFromSBOM(func(s *sbom.SBOM) { + for _, p := range s.Artifacts.Packages.Sorted() { + fileOwner, ok := p.Metadata.(pkg.FileOwner) + if !ok { + continue + } + + for _, pth := range fileOwner.OwnedFiles() { + ownedLocation, err := resolver.FilesByPath(pth) + if err != nil { + log.WithFields("error", err, "path", pth).Trace("unable to find path for owned file") + continue + } + + for _, loc := range ownedLocation { + // if the location is a library, add the package to the set of packages that own the library + if !allLibLocations.Contains(loc.Coordinates) { + continue + } + + if _, ok := packagesByLibraryPath[loc.Coordinates]; !ok { + packagesByLibraryPath[loc.Coordinates] = pkg.NewCollection() + } + + // we have a library path, add the package to the set of packages that own the library + packagesByLibraryPath[loc.Coordinates].Add(p) + } + } + } + }) + + return packagesByLibraryPath +} diff --git a/internal/relationship/finalize.go b/internal/relationship/finalize.go index bfe6bd66f..39f423356 100644 --- a/internal/relationship/finalize.go +++ b/internal/relationship/finalize.go @@ -1,15 +1,20 @@ package relationship import ( + "github.com/anchore/syft/internal/relationship/binary" "github.com/anchore/syft/internal/sbomsync" "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/cataloging" + "github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/sbom" ) -func Finalize(builder sbomsync.Builder, cfg cataloging.RelationshipsConfig, src artifact.Identifiable) { +func Finalize(resolver file.Resolver, builder sbomsync.Builder, cfg cataloging.RelationshipsConfig, src artifact.Identifiable) { accessor := builder.(sbomsync.Accessor) + // remove ELF packages that are already represented by a non-ELF package + // TODO (also, how should we update the TUI to reflect that we removed packages?) + // add relationships showing packages that are evident by a file which is owned by another package (package-to-package) if cfg.PackageFileOwnershipOverlap { byFileOwnershipOverlapWorker(accessor) @@ -21,6 +26,12 @@ func Finalize(builder sbomsync.Builder, cfg cataloging.RelationshipsConfig, src excludeBinariesByFileOwnershipOverlap(accessor) } + // add the new relationships for executables to the SBOM + newBinaryRelationships := binary.NewDependencyRelationships(resolver, accessor) + accessor.WriteToSBOM(func(s *sbom.SBOM) { + s.Relationships = append(s.Relationships, newBinaryRelationships...) + }) + // add source "contains package" relationship (source-to-package) var sourceRelationships []artifact.Relationship accessor.ReadFromSBOM(func(s *sbom.SBOM) { diff --git a/internal/task/relationship_tasks.go b/internal/task/relationship_tasks.go index 2732d04e8..4b23730bc 100644 --- a/internal/task/relationship_tasks.go +++ b/internal/task/relationship_tasks.go @@ -22,8 +22,9 @@ func (s sourceIdentifierAdapter) ID() artifact.ID { } func NewRelationshipsTask(cfg cataloging.RelationshipsConfig, src source.Description) Task { - fn := func(_ context.Context, _ file.Resolver, builder sbomsync.Builder) error { + fn := func(_ context.Context, resolver file.Resolver, builder sbomsync.Builder) error { relationship.Finalize( + resolver, builder, cfg, &sourceIdentifierAdapter{desc: src})