hoist up package relationship discovery to analysis stage

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
Alex Goodman 2021-11-10 11:05:01 -05:00
parent b519340b86
commit cb0278f3bc
No known key found for this signature in database
GPG Key ID: 5CB45AE22BAB7EA7
20 changed files with 60 additions and 61 deletions

View File

@ -1,7 +1,7 @@
{
"artifacts": [
{
"id": "14696638697550896878",
"id": "cbf4f3077fc7deee",
"name": "package-1",
"version": "1.0.1",
"type": "python",
@ -36,7 +36,7 @@
}
},
{
"id": "1889729387356865209",
"id": "1a39aadd9705c2b9",
"name": "package-2",
"version": "2.0.1",
"type": "deb",

View File

@ -1,7 +1,7 @@
{
"artifacts": [
{
"id": "15119766234833480967",
"id": "d1d433485a31ed07",
"name": "package-1",
"version": "1.0.1",
"type": "python",
@ -32,7 +32,7 @@
}
},
{
"id": "3293866126252599174",
"id": "2db629ca48fa6786",
"name": "package-2",
"version": "2.0.1",
"type": "deb",

View File

@ -25,7 +25,7 @@ func ToFormatModel(s sbom.SBOM, applicationConfig interface{}) model.Document {
return model.Document{
Artifacts: toPackageModels(s.Artifacts.PackageCatalog),
ArtifactRelationships: toRelationshipModel(pkg.NewRelationships(s.Artifacts.PackageCatalog)),
ArtifactRelationships: toRelationshipModel(s.Relationships),
Source: src,
Distro: toDistroModel(s.Artifacts.Distro),
Descriptor: model.Descriptor{

View File

@ -72,7 +72,7 @@
],
"artifacts": [
{
"id": "13280550215267739407",
"id": "b84dfe0eb2c5670f",
"name": "package-1",
"version": "1.0.1",
"type": "python",
@ -102,7 +102,7 @@
}
},
{
"id": "7356949319602771519",
"id": "6619226d6979963f",
"name": "package-2",
"version": "2.0.1",
"type": "deb",

View File

@ -13,7 +13,7 @@ type Identifiable interface {
ID() ID
}
func DeriveID(obj interface{}) (ID, error) {
func IDFromHash(obj interface{}) (ID, error) {
f, err := hashstructure.Hash(obj, hashstructure.FormatV2, &hashstructure.HashOptions{
ZeroNil: true,
SlicesAsSets: true,
@ -22,5 +22,5 @@ func DeriveID(obj interface{}) (ID, error) {
return "", fmt.Errorf("could not build ID for object=%+v: %+v", obj, err)
}
return ID(fmt.Sprint(f)), nil
return ID(fmt.Sprintf("%x", f)), nil
}

View File

@ -74,6 +74,8 @@ func Catalog(resolver source.FileResolver, theDistro *distro.Distro, catalogers
allRelationships = append(allRelationships, relationships...)
}
allRelationships = append(allRelationships, pkg.NewRelationships(catalog)...)
if errs != nil {
return nil, nil, errs
}

View File

@ -28,7 +28,7 @@ type Package struct {
}
func (p Package) ID() artifact.ID {
f, err := artifact.DeriveID(p)
f, err := artifact.IDFromHash(p)
if err != nil {
// TODO: what to do in this case?
log.Warnf("unable to get fingerprint of package=%s@%s: %+v", p.Name, p.Version, err)

View File

@ -189,10 +189,10 @@ func TestFingerprint(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
transformedPkg := test.transform(originalPkg)
originalFingerprint, err := originalPkg.Fingerprint()
assert.NoError(t, err, "expected no error on package fingerprint")
transformedFingerprint, err := transformedPkg.Fingerprint()
assert.NoError(t, err, "expected no error on package fingerprint")
originalFingerprint := originalPkg.ID()
assert.NotEmpty(t, originalFingerprint)
transformedFingerprint := transformedPkg.ID()
assert.NotEmpty(t, transformedFingerprint)
if test.expectIdentical {
assert.Equal(t, originalFingerprint, transformedFingerprint)

View File

@ -4,5 +4,5 @@ import "github.com/anchore/syft/syft/artifact"
// TODO: as more relationships are added, this function signature will probably accommodate selection
func NewRelationships(catalog *Catalog) []artifact.Relationship {
return ownershipByFilesRelationships(catalog)
return RelationshipsByFileOwnership(catalog)
}

View File

@ -21,7 +21,9 @@ type ownershipByFilesMetadata struct {
Files []string `json:"files"`
}
func ownershipByFilesRelationships(catalog *Catalog) []artifact.Relationship {
// RelationshipsByFileOwnership creates a package-to-package relationship based on discovering which packages have
// evidence locations that overlap with ownership claim from another package's package manager metadata.
func RelationshipsByFileOwnership(catalog *Catalog) []artifact.Relationship {
var relationships = findOwnershipByFilesRelationships(catalog)
var edges []artifact.Relationship

View File

@ -167,7 +167,7 @@ func TestOwnershipByFilesRelationship(t *testing.T) {
t.Run(test.name, func(t *testing.T) {
pkgs, expectedRelations := test.setup(t)
c := NewCatalog(pkgs...)
relationships := ownershipByFilesRelationships(c)
relationships := RelationshipsByFileOwnership(c)
assert.Len(t, relationships, len(expectedRelations))
for idx, expectedRelationship := range expectedRelations {

View File

@ -75,7 +75,7 @@ func (l Location) String() string {
}
func (l Location) ID() artifact.ID {
f, err := artifact.DeriveID(l)
f, err := artifact.IDFromHash(l)
if err != nil {
// TODO: what to do in this case?
log.Warnf("unable to get fingerprint of location=%+v: %+v", l, err)

View File

@ -49,7 +49,7 @@ func BenchmarkImagePackageCatalogers(b *testing.B) {
}
func TestPkgCoverageImage(t *testing.T) {
catalog, _, _, _ := catalogFixtureImage(t, "image-pkg-coverage")
sbom, _ := catalogFixtureImage(t, "image-pkg-coverage")
observedLanguages := internal.NewStringSet()
definedLanguages := internal.NewStringSet()
@ -80,7 +80,7 @@ func TestPkgCoverageImage(t *testing.T) {
t.Run(c.name, func(t *testing.T) {
pkgCount := 0
for a := range catalog.Enumerate(c.pkgType) {
for a := range sbom.Artifacts.PackageCatalog.Enumerate(c.pkgType) {
if a.Language.String() != "" {
observedLanguages.Add(a.Language.String())
@ -108,7 +108,7 @@ func TestPkgCoverageImage(t *testing.T) {
if pkgCount != len(c.pkgInfo)+c.duplicates {
t.Logf("Discovered packages of type %+v", c.pkgType)
for a := range catalog.Enumerate(c.pkgType) {
for a := range sbom.Artifacts.PackageCatalog.Enumerate(c.pkgType) {
t.Log(" ", a)
}
t.Fatalf("unexpected package count: %d!=%d", pkgCount, len(c.pkgInfo))
@ -135,7 +135,7 @@ func TestPkgCoverageImage(t *testing.T) {
}
func TestPkgCoverageDirectory(t *testing.T) {
catalog, _, _, _ := catalogDirectory(t, "test-fixtures/image-pkg-coverage")
sbom, _ := catalogDirectory(t, "test-fixtures/image-pkg-coverage")
observedLanguages := internal.NewStringSet()
definedLanguages := internal.NewStringSet()
@ -157,7 +157,7 @@ func TestPkgCoverageDirectory(t *testing.T) {
t.Run(test.name, func(t *testing.T) {
actualPkgCount := 0
for actualPkg := range catalog.Enumerate(test.pkgType) {
for actualPkg := range sbom.Artifacts.PackageCatalog.Enumerate(test.pkgType) {
observedLanguages.Add(actualPkg.Language.String())
observedPkgs.Add(string(actualPkg.Type))
@ -182,7 +182,7 @@ func TestPkgCoverageDirectory(t *testing.T) {
}
if actualPkgCount != len(test.pkgInfo)+test.duplicates {
for actualPkg := range catalog.Enumerate(test.pkgType) {
for actualPkg := range sbom.Artifacts.PackageCatalog.Enumerate(test.pkgType) {
t.Log(" ", actualPkg)
}
t.Fatalf("unexpected package count: %d!=%d", actualPkgCount, len(test.pkgInfo))

View File

@ -8,14 +8,14 @@ import (
)
func TestDistroImage(t *testing.T) {
_, _, actualDistro, _ := catalogFixtureImage(t, "image-distro-id")
sbom, _ := catalogFixtureImage(t, "image-distro-id")
expected, err := distro.NewDistro(distro.Busybox, "1.31.1", "")
if err != nil {
t.Fatalf("could not create distro: %+v", err)
}
for _, d := range deep.Equal(actualDistro, &expected) {
for _, d := range deep.Equal(sbom.Artifacts.Distro, &expected) {
t.Errorf("found distro difference: %+v", d)
}

View File

@ -8,8 +8,6 @@ import (
"github.com/sergi/go-diff/diffmatchpatch"
"github.com/anchore/syft/syft/sbom"
"github.com/anchore/syft/syft/format"
"github.com/stretchr/testify/assert"
)
@ -31,15 +29,7 @@ func TestEncodeDecodeEncodeCycleComparison(t *testing.T) {
for _, test := range tests {
t.Run(string(test.format), func(t *testing.T) {
originalCatalog, _, d, src := catalogFixtureImage(t, "image-pkg-coverage")
originalSBOM := sbom.SBOM{
Artifacts: sbom.Artifacts{
PackageCatalog: originalCatalog,
Distro: d,
},
Source: src.Metadata,
}
originalSBOM, _ := catalogFixtureImage(t, "image-pkg-coverage")
by1, err := syft.Encode(originalSBOM, test.format)
assert.NoError(t, err)

View File

@ -9,11 +9,11 @@ import (
)
func TestNpmPackageLockDirectory(t *testing.T) {
catalog, _, _, _ := catalogDirectory(t, "test-fixtures/npm-lock")
sbom, _ := catalogDirectory(t, "test-fixtures/npm-lock")
foundPackages := internal.NewStringSet()
for actualPkg := range catalog.Enumerate(pkg.NpmPkg) {
for actualPkg := range sbom.Artifacts.PackageCatalog.Enumerate(pkg.NpmPkg) {
for _, actualLocation := range actualPkg.Locations {
if strings.Contains(actualLocation.RealPath, "node_modules") {
t.Errorf("found packages from package-lock.json in node_modules: %s", actualLocation)
@ -30,11 +30,11 @@ func TestNpmPackageLockDirectory(t *testing.T) {
}
func TestYarnPackageLockDirectory(t *testing.T) {
catalog, _, _, _ := catalogDirectory(t, "test-fixtures/yarn-lock")
sbom, _ := catalogDirectory(t, "test-fixtures/yarn-lock")
foundPackages := internal.NewStringSet()
for actualPkg := range catalog.Enumerate(pkg.NpmPkg) {
for actualPkg := range sbom.Artifacts.PackageCatalog.Enumerate(pkg.NpmPkg) {
for _, actualLocation := range actualPkg.Locations {
if strings.Contains(actualLocation.RealPath, "node_modules") {
t.Errorf("found packages from yarn.lock in node_modules: %s", actualLocation)

View File

@ -7,7 +7,6 @@ import (
"github.com/anchore/syft/internal/formats/syftjson"
syftjsonModel "github.com/anchore/syft/internal/formats/syftjson/model"
"github.com/anchore/syft/syft/sbom"
)
func TestPackageOwnershipRelationships(t *testing.T) {
@ -23,15 +22,9 @@ func TestPackageOwnershipRelationships(t *testing.T) {
for _, test := range tests {
t.Run(test.fixture, func(t *testing.T) {
catalog, _, d, src := catalogFixtureImage(t, test.fixture)
sbom, _ := catalogFixtureImage(t, test.fixture)
p := syftjson.Format().Presenter(sbom.SBOM{
Artifacts: sbom.Artifacts{
PackageCatalog: catalog,
Distro: d,
},
Source: src.Metadata,
})
p := syftjson.Format().Presenter(sbom)
if p == nil {
t.Fatal("unable to get presenter")
}

View File

@ -9,11 +9,11 @@ import (
func TestRegression212ApkBufferSize(t *testing.T) {
// This is a regression test for issue #212 (https://github.com/anchore/syft/issues/212) in which the apk db could
// not be processed due to a scanner buffer that was too small
catalog, _, _, _ := catalogFixtureImage(t, "image-large-apk-data")
sbom, _ := catalogFixtureImage(t, "image-large-apk-data")
expectedPkgs := 58
actualPkgs := 0
for range catalog.Enumerate(pkg.ApkPkg) {
for range sbom.Artifacts.PackageCatalog.Enumerate(pkg.ApkPkg) {
actualPkgs += 1
}

View File

@ -15,11 +15,11 @@ func TestRegressionGoArchDiscovery(t *testing.T) {
)
// This is a regression test to make sure the way we detect go binary packages
// stays consistent and reproducible as the tool chain evolves
catalog, _, _, _ := catalogFixtureImage(t, "image-go-bin-arch-coverage")
sbom, _ := catalogFixtureImage(t, "image-go-bin-arch-coverage")
var actualELF, actualWIN, actualMACOS int
for p := range catalog.Enumerate(pkg.GoModulePkg) {
for p := range sbom.Artifacts.PackageCatalog.Enumerate(pkg.GoModulePkg) {
for _, l := range p.Locations {
switch {
case strings.Contains(l.RealPath, "elf"):

View File

@ -3,16 +3,14 @@ package integration
import (
"testing"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/sbom"
"github.com/anchore/stereoscope/pkg/imagetest"
"github.com/anchore/syft/syft"
"github.com/anchore/syft/syft/distro"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/source"
)
func catalogFixtureImage(t *testing.T, fixtureImageName string) (*pkg.Catalog, []artifact.Relationship, *distro.Distro, *source.Source) {
func catalogFixtureImage(t *testing.T, fixtureImageName string) (sbom.SBOM, *source.Source) {
imagetest.GetFixtureImage(t, "docker-archive", fixtureImageName)
tarPath := imagetest.GetFixtureImageTarPath(t, fixtureImageName)
@ -27,10 +25,17 @@ func catalogFixtureImage(t *testing.T, fixtureImageName string) (*pkg.Catalog, [
t.Fatalf("failed to catalog image: %+v", err)
}
return pkgCatalog, relationships, actualDistro, theSource
return sbom.SBOM{
Artifacts: sbom.Artifacts{
PackageCatalog: pkgCatalog,
Distro: actualDistro,
},
Relationships: relationships,
Source: theSource.Metadata,
}, theSource
}
func catalogDirectory(t *testing.T, dir string) (*pkg.Catalog, []artifact.Relationship, *distro.Distro, *source.Source) {
func catalogDirectory(t *testing.T, dir string) (sbom.SBOM, *source.Source) {
theSource, cleanupSource, err := source.New("dir:"+dir, nil)
t.Cleanup(cleanupSource)
if err != nil {
@ -42,5 +47,12 @@ func catalogDirectory(t *testing.T, dir string) (*pkg.Catalog, []artifact.Relati
t.Fatalf("failed to catalog image: %+v", err)
}
return pkgCatalog, relationships, actualDistro, theSource
return sbom.SBOM{
Artifacts: sbom.Artifacts{
PackageCatalog: pkgCatalog,
Distro: actualDistro,
},
Relationships: relationships,
Source: theSource.Metadata,
}, theSource
}