syft/syft/pkg/cataloger/binary/cataloger.go

125 lines
3.7 KiB
Go

/*
Package binary provides a concrete Cataloger implementations for surfacing possible packages based on signatures found within binary files.
*/
package binary
import (
"encoding/json"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
)
const catalogerName = "binary-cataloger"
type CatalogerConfig struct {
Classifiers []Classifier `yaml:"classifiers" json:"classifiers" mapstructure:"classifiers"`
}
func DefaultCatalogerConfig() CatalogerConfig {
return CatalogerConfig{
Classifiers: DefaultClassifiers(),
}
}
func NewCataloger(cfg CatalogerConfig) pkg.Cataloger {
return &Cataloger{
classifiers: cfg.Classifiers,
}
}
func (cfg CatalogerConfig) MarshalJSON() ([]byte, error) {
// only keep the class names
var names []string
for _, cls := range cfg.Classifiers {
names = append(names, cls.Class)
}
return json.Marshal(names)
}
// Cataloger is the cataloger responsible for surfacing evidence of a very limited set of binary files,
// which have been identified by the classifiers. The Cataloger is _NOT_ a place to catalog any and every
// binary, but rather the specific set that has been curated to be important, predominantly related to toolchain-
// related runtimes like Python, Go, Java, or Node. Some exceptions can be made for widely-used binaries such
// as busybox.
type Cataloger struct {
classifiers []Classifier
}
// Name returns a string that uniquely describes the Cataloger
func (c Cataloger) Name() string {
return catalogerName
}
// Catalog is given an object to resolve file references and content, this function returns any discovered Packages
// after analyzing the catalog source.
func (c Cataloger) Catalog(resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) {
var packages []pkg.Package
var relationships []artifact.Relationship
for _, cls := range c.classifiers {
log.WithFields("classifier", cls.Class).Trace("cataloging binaries")
newPkgs, err := catalog(resolver, cls)
if err != nil {
log.WithFields("error", err, "classifier", cls.Class).Warn("unable to catalog binary package: %w", err)
continue
}
newPackages:
for i := range newPkgs {
newPkg := &newPkgs[i]
for j := range packages {
p := &packages[j]
// consolidate identical packages found in different locations or by different classifiers
if packagesMatch(p, newPkg) {
mergePackages(p, newPkg)
continue newPackages
}
}
packages = append(packages, *newPkg)
}
}
return packages, relationships, nil
}
// mergePackages merges information from the extra package into the target package
func mergePackages(target *pkg.Package, extra *pkg.Package) {
// add the locations
target.Locations.Add(extra.Locations.ToSlice()...)
// update the metadata to indicate which classifiers were used
meta, _ := target.Metadata.(pkg.BinarySignature)
if m, ok := extra.Metadata.(pkg.BinarySignature); ok {
meta.Matches = append(meta.Matches, m.Matches...)
}
target.Metadata = meta
}
func catalog(resolver file.Resolver, cls Classifier) (packages []pkg.Package, err error) {
locations, err := resolver.FilesByGlob(cls.FileGlob)
if err != nil {
return nil, err
}
for _, location := range locations {
pkgs, err := cls.EvidenceMatcher(resolver, cls, location)
if err != nil {
return nil, err
}
packages = append(packages, pkgs...)
}
return packages, nil
}
// packagesMatch returns true if the binary packages "match" based on basic criteria
func packagesMatch(p1 *pkg.Package, p2 *pkg.Package) bool {
if p1.Name != p2.Name ||
p1.Version != p2.Version ||
p1.Language != p2.Language ||
p1.Type != p2.Type {
return false
}
return true
}