hoist up package relationship discovery to analysis stage

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
Alex Goodman 2021-11-10 11:05:01 -05:00
parent b519340b86
commit cb0278f3bc
No known key found for this signature in database
GPG Key ID: 5CB45AE22BAB7EA7
20 changed files with 60 additions and 61 deletions

View File

@ -1,7 +1,7 @@
{ {
"artifacts": [ "artifacts": [
{ {
"id": "14696638697550896878", "id": "cbf4f3077fc7deee",
"name": "package-1", "name": "package-1",
"version": "1.0.1", "version": "1.0.1",
"type": "python", "type": "python",
@ -36,7 +36,7 @@
} }
}, },
{ {
"id": "1889729387356865209", "id": "1a39aadd9705c2b9",
"name": "package-2", "name": "package-2",
"version": "2.0.1", "version": "2.0.1",
"type": "deb", "type": "deb",

View File

@ -1,7 +1,7 @@
{ {
"artifacts": [ "artifacts": [
{ {
"id": "15119766234833480967", "id": "d1d433485a31ed07",
"name": "package-1", "name": "package-1",
"version": "1.0.1", "version": "1.0.1",
"type": "python", "type": "python",
@ -32,7 +32,7 @@
} }
}, },
{ {
"id": "3293866126252599174", "id": "2db629ca48fa6786",
"name": "package-2", "name": "package-2",
"version": "2.0.1", "version": "2.0.1",
"type": "deb", "type": "deb",

View File

@ -25,7 +25,7 @@ func ToFormatModel(s sbom.SBOM, applicationConfig interface{}) model.Document {
return model.Document{ return model.Document{
Artifacts: toPackageModels(s.Artifacts.PackageCatalog), Artifacts: toPackageModels(s.Artifacts.PackageCatalog),
ArtifactRelationships: toRelationshipModel(pkg.NewRelationships(s.Artifacts.PackageCatalog)), ArtifactRelationships: toRelationshipModel(s.Relationships),
Source: src, Source: src,
Distro: toDistroModel(s.Artifacts.Distro), Distro: toDistroModel(s.Artifacts.Distro),
Descriptor: model.Descriptor{ Descriptor: model.Descriptor{

View File

@ -72,7 +72,7 @@
], ],
"artifacts": [ "artifacts": [
{ {
"id": "13280550215267739407", "id": "b84dfe0eb2c5670f",
"name": "package-1", "name": "package-1",
"version": "1.0.1", "version": "1.0.1",
"type": "python", "type": "python",
@ -102,7 +102,7 @@
} }
}, },
{ {
"id": "7356949319602771519", "id": "6619226d6979963f",
"name": "package-2", "name": "package-2",
"version": "2.0.1", "version": "2.0.1",
"type": "deb", "type": "deb",

View File

@ -13,7 +13,7 @@ type Identifiable interface {
ID() ID ID() ID
} }
func DeriveID(obj interface{}) (ID, error) { func IDFromHash(obj interface{}) (ID, error) {
f, err := hashstructure.Hash(obj, hashstructure.FormatV2, &hashstructure.HashOptions{ f, err := hashstructure.Hash(obj, hashstructure.FormatV2, &hashstructure.HashOptions{
ZeroNil: true, ZeroNil: true,
SlicesAsSets: true, SlicesAsSets: true,
@ -22,5 +22,5 @@ func DeriveID(obj interface{}) (ID, error) {
return "", fmt.Errorf("could not build ID for object=%+v: %+v", obj, err) return "", fmt.Errorf("could not build ID for object=%+v: %+v", obj, err)
} }
return ID(fmt.Sprint(f)), nil return ID(fmt.Sprintf("%x", f)), nil
} }

View File

@ -74,6 +74,8 @@ func Catalog(resolver source.FileResolver, theDistro *distro.Distro, catalogers
allRelationships = append(allRelationships, relationships...) allRelationships = append(allRelationships, relationships...)
} }
allRelationships = append(allRelationships, pkg.NewRelationships(catalog)...)
if errs != nil { if errs != nil {
return nil, nil, errs return nil, nil, errs
} }

View File

@ -28,7 +28,7 @@ type Package struct {
} }
func (p Package) ID() artifact.ID { func (p Package) ID() artifact.ID {
f, err := artifact.DeriveID(p) f, err := artifact.IDFromHash(p)
if err != nil { if err != nil {
// TODO: what to do in this case? // TODO: what to do in this case?
log.Warnf("unable to get fingerprint of package=%s@%s: %+v", p.Name, p.Version, err) log.Warnf("unable to get fingerprint of package=%s@%s: %+v", p.Name, p.Version, err)

View File

@ -189,10 +189,10 @@ func TestFingerprint(t *testing.T) {
for _, test := range tests { for _, test := range tests {
t.Run(test.name, func(t *testing.T) { t.Run(test.name, func(t *testing.T) {
transformedPkg := test.transform(originalPkg) transformedPkg := test.transform(originalPkg)
originalFingerprint, err := originalPkg.Fingerprint() originalFingerprint := originalPkg.ID()
assert.NoError(t, err, "expected no error on package fingerprint") assert.NotEmpty(t, originalFingerprint)
transformedFingerprint, err := transformedPkg.Fingerprint() transformedFingerprint := transformedPkg.ID()
assert.NoError(t, err, "expected no error on package fingerprint") assert.NotEmpty(t, transformedFingerprint)
if test.expectIdentical { if test.expectIdentical {
assert.Equal(t, originalFingerprint, transformedFingerprint) assert.Equal(t, originalFingerprint, transformedFingerprint)

View File

@ -4,5 +4,5 @@ import "github.com/anchore/syft/syft/artifact"
// TODO: as more relationships are added, this function signature will probably accommodate selection // TODO: as more relationships are added, this function signature will probably accommodate selection
func NewRelationships(catalog *Catalog) []artifact.Relationship { func NewRelationships(catalog *Catalog) []artifact.Relationship {
return ownershipByFilesRelationships(catalog) return RelationshipsByFileOwnership(catalog)
} }

View File

@ -21,7 +21,9 @@ type ownershipByFilesMetadata struct {
Files []string `json:"files"` Files []string `json:"files"`
} }
func ownershipByFilesRelationships(catalog *Catalog) []artifact.Relationship { // RelationshipsByFileOwnership creates a package-to-package relationship based on discovering which packages have
// evidence locations that overlap with ownership claim from another package's package manager metadata.
func RelationshipsByFileOwnership(catalog *Catalog) []artifact.Relationship {
var relationships = findOwnershipByFilesRelationships(catalog) var relationships = findOwnershipByFilesRelationships(catalog)
var edges []artifact.Relationship var edges []artifact.Relationship

View File

@ -167,7 +167,7 @@ func TestOwnershipByFilesRelationship(t *testing.T) {
t.Run(test.name, func(t *testing.T) { t.Run(test.name, func(t *testing.T) {
pkgs, expectedRelations := test.setup(t) pkgs, expectedRelations := test.setup(t)
c := NewCatalog(pkgs...) c := NewCatalog(pkgs...)
relationships := ownershipByFilesRelationships(c) relationships := RelationshipsByFileOwnership(c)
assert.Len(t, relationships, len(expectedRelations)) assert.Len(t, relationships, len(expectedRelations))
for idx, expectedRelationship := range expectedRelations { for idx, expectedRelationship := range expectedRelations {

View File

@ -75,7 +75,7 @@ func (l Location) String() string {
} }
func (l Location) ID() artifact.ID { func (l Location) ID() artifact.ID {
f, err := artifact.DeriveID(l) f, err := artifact.IDFromHash(l)
if err != nil { if err != nil {
// TODO: what to do in this case? // TODO: what to do in this case?
log.Warnf("unable to get fingerprint of location=%+v: %+v", l, err) log.Warnf("unable to get fingerprint of location=%+v: %+v", l, err)

View File

@ -49,7 +49,7 @@ func BenchmarkImagePackageCatalogers(b *testing.B) {
} }
func TestPkgCoverageImage(t *testing.T) { func TestPkgCoverageImage(t *testing.T) {
catalog, _, _, _ := catalogFixtureImage(t, "image-pkg-coverage") sbom, _ := catalogFixtureImage(t, "image-pkg-coverage")
observedLanguages := internal.NewStringSet() observedLanguages := internal.NewStringSet()
definedLanguages := internal.NewStringSet() definedLanguages := internal.NewStringSet()
@ -80,7 +80,7 @@ func TestPkgCoverageImage(t *testing.T) {
t.Run(c.name, func(t *testing.T) { t.Run(c.name, func(t *testing.T) {
pkgCount := 0 pkgCount := 0
for a := range catalog.Enumerate(c.pkgType) { for a := range sbom.Artifacts.PackageCatalog.Enumerate(c.pkgType) {
if a.Language.String() != "" { if a.Language.String() != "" {
observedLanguages.Add(a.Language.String()) observedLanguages.Add(a.Language.String())
@ -108,7 +108,7 @@ func TestPkgCoverageImage(t *testing.T) {
if pkgCount != len(c.pkgInfo)+c.duplicates { if pkgCount != len(c.pkgInfo)+c.duplicates {
t.Logf("Discovered packages of type %+v", c.pkgType) t.Logf("Discovered packages of type %+v", c.pkgType)
for a := range catalog.Enumerate(c.pkgType) { for a := range sbom.Artifacts.PackageCatalog.Enumerate(c.pkgType) {
t.Log(" ", a) t.Log(" ", a)
} }
t.Fatalf("unexpected package count: %d!=%d", pkgCount, len(c.pkgInfo)) t.Fatalf("unexpected package count: %d!=%d", pkgCount, len(c.pkgInfo))
@ -135,7 +135,7 @@ func TestPkgCoverageImage(t *testing.T) {
} }
func TestPkgCoverageDirectory(t *testing.T) { func TestPkgCoverageDirectory(t *testing.T) {
catalog, _, _, _ := catalogDirectory(t, "test-fixtures/image-pkg-coverage") sbom, _ := catalogDirectory(t, "test-fixtures/image-pkg-coverage")
observedLanguages := internal.NewStringSet() observedLanguages := internal.NewStringSet()
definedLanguages := internal.NewStringSet() definedLanguages := internal.NewStringSet()
@ -157,7 +157,7 @@ func TestPkgCoverageDirectory(t *testing.T) {
t.Run(test.name, func(t *testing.T) { t.Run(test.name, func(t *testing.T) {
actualPkgCount := 0 actualPkgCount := 0
for actualPkg := range catalog.Enumerate(test.pkgType) { for actualPkg := range sbom.Artifacts.PackageCatalog.Enumerate(test.pkgType) {
observedLanguages.Add(actualPkg.Language.String()) observedLanguages.Add(actualPkg.Language.String())
observedPkgs.Add(string(actualPkg.Type)) observedPkgs.Add(string(actualPkg.Type))
@ -182,7 +182,7 @@ func TestPkgCoverageDirectory(t *testing.T) {
} }
if actualPkgCount != len(test.pkgInfo)+test.duplicates { if actualPkgCount != len(test.pkgInfo)+test.duplicates {
for actualPkg := range catalog.Enumerate(test.pkgType) { for actualPkg := range sbom.Artifacts.PackageCatalog.Enumerate(test.pkgType) {
t.Log(" ", actualPkg) t.Log(" ", actualPkg)
} }
t.Fatalf("unexpected package count: %d!=%d", actualPkgCount, len(test.pkgInfo)) t.Fatalf("unexpected package count: %d!=%d", actualPkgCount, len(test.pkgInfo))

View File

@ -8,14 +8,14 @@ import (
) )
func TestDistroImage(t *testing.T) { func TestDistroImage(t *testing.T) {
_, _, actualDistro, _ := catalogFixtureImage(t, "image-distro-id") sbom, _ := catalogFixtureImage(t, "image-distro-id")
expected, err := distro.NewDistro(distro.Busybox, "1.31.1", "") expected, err := distro.NewDistro(distro.Busybox, "1.31.1", "")
if err != nil { if err != nil {
t.Fatalf("could not create distro: %+v", err) t.Fatalf("could not create distro: %+v", err)
} }
for _, d := range deep.Equal(actualDistro, &expected) { for _, d := range deep.Equal(sbom.Artifacts.Distro, &expected) {
t.Errorf("found distro difference: %+v", d) t.Errorf("found distro difference: %+v", d)
} }

View File

@ -8,8 +8,6 @@ import (
"github.com/sergi/go-diff/diffmatchpatch" "github.com/sergi/go-diff/diffmatchpatch"
"github.com/anchore/syft/syft/sbom"
"github.com/anchore/syft/syft/format" "github.com/anchore/syft/syft/format"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
) )
@ -31,15 +29,7 @@ func TestEncodeDecodeEncodeCycleComparison(t *testing.T) {
for _, test := range tests { for _, test := range tests {
t.Run(string(test.format), func(t *testing.T) { t.Run(string(test.format), func(t *testing.T) {
originalCatalog, _, d, src := catalogFixtureImage(t, "image-pkg-coverage") originalSBOM, _ := catalogFixtureImage(t, "image-pkg-coverage")
originalSBOM := sbom.SBOM{
Artifacts: sbom.Artifacts{
PackageCatalog: originalCatalog,
Distro: d,
},
Source: src.Metadata,
}
by1, err := syft.Encode(originalSBOM, test.format) by1, err := syft.Encode(originalSBOM, test.format)
assert.NoError(t, err) assert.NoError(t, err)

View File

@ -9,11 +9,11 @@ import (
) )
func TestNpmPackageLockDirectory(t *testing.T) { func TestNpmPackageLockDirectory(t *testing.T) {
catalog, _, _, _ := catalogDirectory(t, "test-fixtures/npm-lock") sbom, _ := catalogDirectory(t, "test-fixtures/npm-lock")
foundPackages := internal.NewStringSet() foundPackages := internal.NewStringSet()
for actualPkg := range catalog.Enumerate(pkg.NpmPkg) { for actualPkg := range sbom.Artifacts.PackageCatalog.Enumerate(pkg.NpmPkg) {
for _, actualLocation := range actualPkg.Locations { for _, actualLocation := range actualPkg.Locations {
if strings.Contains(actualLocation.RealPath, "node_modules") { if strings.Contains(actualLocation.RealPath, "node_modules") {
t.Errorf("found packages from package-lock.json in node_modules: %s", actualLocation) t.Errorf("found packages from package-lock.json in node_modules: %s", actualLocation)
@ -30,11 +30,11 @@ func TestNpmPackageLockDirectory(t *testing.T) {
} }
func TestYarnPackageLockDirectory(t *testing.T) { func TestYarnPackageLockDirectory(t *testing.T) {
catalog, _, _, _ := catalogDirectory(t, "test-fixtures/yarn-lock") sbom, _ := catalogDirectory(t, "test-fixtures/yarn-lock")
foundPackages := internal.NewStringSet() foundPackages := internal.NewStringSet()
for actualPkg := range catalog.Enumerate(pkg.NpmPkg) { for actualPkg := range sbom.Artifacts.PackageCatalog.Enumerate(pkg.NpmPkg) {
for _, actualLocation := range actualPkg.Locations { for _, actualLocation := range actualPkg.Locations {
if strings.Contains(actualLocation.RealPath, "node_modules") { if strings.Contains(actualLocation.RealPath, "node_modules") {
t.Errorf("found packages from yarn.lock in node_modules: %s", actualLocation) t.Errorf("found packages from yarn.lock in node_modules: %s", actualLocation)

View File

@ -7,7 +7,6 @@ import (
"github.com/anchore/syft/internal/formats/syftjson" "github.com/anchore/syft/internal/formats/syftjson"
syftjsonModel "github.com/anchore/syft/internal/formats/syftjson/model" syftjsonModel "github.com/anchore/syft/internal/formats/syftjson/model"
"github.com/anchore/syft/syft/sbom"
) )
func TestPackageOwnershipRelationships(t *testing.T) { func TestPackageOwnershipRelationships(t *testing.T) {
@ -23,15 +22,9 @@ func TestPackageOwnershipRelationships(t *testing.T) {
for _, test := range tests { for _, test := range tests {
t.Run(test.fixture, func(t *testing.T) { t.Run(test.fixture, func(t *testing.T) {
catalog, _, d, src := catalogFixtureImage(t, test.fixture) sbom, _ := catalogFixtureImage(t, test.fixture)
p := syftjson.Format().Presenter(sbom.SBOM{ p := syftjson.Format().Presenter(sbom)
Artifacts: sbom.Artifacts{
PackageCatalog: catalog,
Distro: d,
},
Source: src.Metadata,
})
if p == nil { if p == nil {
t.Fatal("unable to get presenter") t.Fatal("unable to get presenter")
} }

View File

@ -9,11 +9,11 @@ import (
func TestRegression212ApkBufferSize(t *testing.T) { func TestRegression212ApkBufferSize(t *testing.T) {
// This is a regression test for issue #212 (https://github.com/anchore/syft/issues/212) in which the apk db could // This is a regression test for issue #212 (https://github.com/anchore/syft/issues/212) in which the apk db could
// not be processed due to a scanner buffer that was too small // not be processed due to a scanner buffer that was too small
catalog, _, _, _ := catalogFixtureImage(t, "image-large-apk-data") sbom, _ := catalogFixtureImage(t, "image-large-apk-data")
expectedPkgs := 58 expectedPkgs := 58
actualPkgs := 0 actualPkgs := 0
for range catalog.Enumerate(pkg.ApkPkg) { for range sbom.Artifacts.PackageCatalog.Enumerate(pkg.ApkPkg) {
actualPkgs += 1 actualPkgs += 1
} }

View File

@ -15,11 +15,11 @@ func TestRegressionGoArchDiscovery(t *testing.T) {
) )
// This is a regression test to make sure the way we detect go binary packages // This is a regression test to make sure the way we detect go binary packages
// stays consistent and reproducible as the tool chain evolves // stays consistent and reproducible as the tool chain evolves
catalog, _, _, _ := catalogFixtureImage(t, "image-go-bin-arch-coverage") sbom, _ := catalogFixtureImage(t, "image-go-bin-arch-coverage")
var actualELF, actualWIN, actualMACOS int var actualELF, actualWIN, actualMACOS int
for p := range catalog.Enumerate(pkg.GoModulePkg) { for p := range sbom.Artifacts.PackageCatalog.Enumerate(pkg.GoModulePkg) {
for _, l := range p.Locations { for _, l := range p.Locations {
switch { switch {
case strings.Contains(l.RealPath, "elf"): case strings.Contains(l.RealPath, "elf"):

View File

@ -3,16 +3,14 @@ package integration
import ( import (
"testing" "testing"
"github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/sbom"
"github.com/anchore/stereoscope/pkg/imagetest" "github.com/anchore/stereoscope/pkg/imagetest"
"github.com/anchore/syft/syft" "github.com/anchore/syft/syft"
"github.com/anchore/syft/syft/distro"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/source" "github.com/anchore/syft/syft/source"
) )
func catalogFixtureImage(t *testing.T, fixtureImageName string) (*pkg.Catalog, []artifact.Relationship, *distro.Distro, *source.Source) { func catalogFixtureImage(t *testing.T, fixtureImageName string) (sbom.SBOM, *source.Source) {
imagetest.GetFixtureImage(t, "docker-archive", fixtureImageName) imagetest.GetFixtureImage(t, "docker-archive", fixtureImageName)
tarPath := imagetest.GetFixtureImageTarPath(t, fixtureImageName) tarPath := imagetest.GetFixtureImageTarPath(t, fixtureImageName)
@ -27,10 +25,17 @@ func catalogFixtureImage(t *testing.T, fixtureImageName string) (*pkg.Catalog, [
t.Fatalf("failed to catalog image: %+v", err) t.Fatalf("failed to catalog image: %+v", err)
} }
return pkgCatalog, relationships, actualDistro, theSource return sbom.SBOM{
Artifacts: sbom.Artifacts{
PackageCatalog: pkgCatalog,
Distro: actualDistro,
},
Relationships: relationships,
Source: theSource.Metadata,
}, theSource
} }
func catalogDirectory(t *testing.T, dir string) (*pkg.Catalog, []artifact.Relationship, *distro.Distro, *source.Source) { func catalogDirectory(t *testing.T, dir string) (sbom.SBOM, *source.Source) {
theSource, cleanupSource, err := source.New("dir:"+dir, nil) theSource, cleanupSource, err := source.New("dir:"+dir, nil)
t.Cleanup(cleanupSource) t.Cleanup(cleanupSource)
if err != nil { if err != nil {
@ -42,5 +47,12 @@ func catalogDirectory(t *testing.T, dir string) (*pkg.Catalog, []artifact.Relati
t.Fatalf("failed to catalog image: %+v", err) t.Fatalf("failed to catalog image: %+v", err)
} }
return pkgCatalog, relationships, actualDistro, theSource return sbom.SBOM{
Artifacts: sbom.Artifacts{
PackageCatalog: pkgCatalog,
Distro: actualDistro,
},
Relationships: relationships,
Source: theSource.Metadata,
}, theSource
} }