diff --git a/cmd/syft/internal/options/format_spdx_json.go b/cmd/syft/internal/options/format_spdx_json.go index efdccb230..53e822650 100644 --- a/cmd/syft/internal/options/format_spdx_json.go +++ b/cmd/syft/internal/options/format_spdx_json.go @@ -13,12 +13,10 @@ func DefaultFormatSPDXJSON() FormatSPDXJSON { } func (o FormatSPDXJSON) config(v string) spdxjson.EncoderConfig { - var pretty bool + c := spdxjson.DefaultEncoderConfig() + c.Version = v if o.Pretty != nil { - pretty = *o.Pretty - } - return spdxjson.EncoderConfig{ - Version: v, - Pretty: pretty, + c.Pretty = *o.Pretty } + return c } diff --git a/cmd/syft/internal/options/format_spdx_json_test.go b/cmd/syft/internal/options/format_spdx_json_test.go index b81ef04c7..f7218d5f0 100644 --- a/cmd/syft/internal/options/format_spdx_json_test.go +++ b/cmd/syft/internal/options/format_spdx_json_test.go @@ -2,6 +2,8 @@ package options import ( "testing" + + "github.com/stretchr/testify/require" ) func TestFormatSPDXJSON_buildConfig(t *testing.T) { @@ -11,5 +13,7 @@ func TestFormatSPDXJSON_buildConfig(t *testing.T) { ft = setAllToNonZero(t, ft).(*FormatSPDXJSON) subject := ft.config("Version") - assertExpectedValue(t, subject) + require.Equal(t, "Version", subject.Version) + require.Equal(t, "2.3", subject.DefaultVersion) + require.True(t, subject.Pretty) } diff --git a/go.mod b/go.mod index e339ffe80..563220d46 100644 --- a/go.mod +++ b/go.mod @@ -79,7 +79,7 @@ require ( github.com/scylladb/go-set v1.0.3-0.20200225121959-cc7b2070d91e github.com/sergi/go-diff v1.4.0 github.com/spdx/gordf v0.0.0-20201111095634-7098f93598fb - github.com/spdx/tools-golang v0.5.7 + github.com/spdx/tools-golang v0.6.0-rc4 github.com/spf13/afero v1.15.0 github.com/spf13/cobra v1.10.2 github.com/stretchr/testify v1.11.1 @@ -127,7 +127,7 @@ require ( github.com/STARRY-S/zip v0.2.3 // indirect github.com/agext/levenshtein v1.2.1 // indirect github.com/anchore/go-lzo v0.1.0 // indirect - github.com/anchore/go-struct-converter v0.1.0 // indirect + github.com/anchore/go-struct-converter v0.2.0-rc2 // indirect github.com/andybalholm/brotli v1.2.0 // indirect github.com/apparentlymart/go-textseg/v15 v15.0.0 // indirect github.com/aquasecurity/go-version v0.0.1 // indirect @@ -258,12 +258,14 @@ require ( github.com/pborman/indent v1.2.1 // indirect github.com/pelletier/go-toml/v2 v2.3.1 // indirect github.com/pierrec/lz4/v4 v4.1.26 // indirect + github.com/piprate/json-gold v0.7.0 // indirect github.com/pjbgf/sha1cd v0.6.0 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pkg/profile v1.7.0 // indirect github.com/pkg/xattr v0.4.12 // indirect github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect + github.com/pquerna/cachecontrol v0.0.0-20180517163645-1555304b9b35 // indirect github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect github.com/rivo/uniseg v0.4.7 // indirect github.com/rogpeppe/go-internal v1.14.1 // indirect diff --git a/go.sum b/go.sum index 21eec0037..031a56fc3 100644 --- a/go.sum +++ b/go.sum @@ -138,8 +138,8 @@ github.com/anchore/go-macholibre v0.1.0 h1:qHbdusBZNcZM/uuKf1Psa9xxAFSoyRTps8GW9 github.com/anchore/go-macholibre v0.1.0/go.mod h1:eu0gbwaZ+ocVFJLePdmPPDKU8MboV1MKsUCr36Ckd5s= github.com/anchore/go-rpmdb v0.1.0 h1:Q8dc208/HYzCqhx0L1zurfm1UPil24hlo9NjkdFmLdE= github.com/anchore/go-rpmdb v0.1.0/go.mod h1:eQVa6QFGzKy0qMcnW2pez0XBczvgwSjw9vA23qifEyU= -github.com/anchore/go-struct-converter v0.1.0 h1:2rDRssAl6mgKBSLNiVCMADgZRhoqtw9dedlWa0OhD30= -github.com/anchore/go-struct-converter v0.1.0/go.mod h1:rYqSE9HbjzpHTI74vwPvae4ZVYZd1lue2ta6xHPdblA= +github.com/anchore/go-struct-converter v0.2.0-rc2 h1:q+859fW2/jbHJHB2etbNfRlFwYpknyvbqqk1hUdamQ4= +github.com/anchore/go-struct-converter v0.2.0-rc2/go.mod h1:cDBA5vhcR62nXWo8QH9/Kk2807o65ISaHPNPX66L+Uw= github.com/anchore/go-sync v0.1.0 h1:1TEZM7jISrvtoBMOF79xP0caQKASAtgW1yKqc0EjyZg= github.com/anchore/go-sync v0.1.0/go.mod h1:Iposeub0kHipoTei1icj4Tys0SJN+cCdxEnkS7bZUFs= github.com/anchore/go-version v1.2.2-0.20200701162849-18adb9c92b9b h1:e1bmaoJfZVsCYMrIZBpFxwV26CbsuoEh5muXD5I1Ods= @@ -789,6 +789,8 @@ github.com/pelletier/go-toml/v2 v2.3.1 h1:MYEvvGnQjeNkRF1qUuGolNtNExTDwct51yp7ol github.com/pelletier/go-toml/v2 v2.3.1/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY= github.com/pierrec/lz4/v4 v4.1.26 h1:GrpZw1gZttORinvzBdXPUXATeqlJjqUG/D87TKMnhjY= github.com/pierrec/lz4/v4 v4.1.26/go.mod h1:EoQMVJgeeEOMsCqCzqFm2O0cJvljX2nGZjcRIPL34O4= +github.com/piprate/json-gold v0.7.0 h1:bEMirgA5y8Z2loTQfxyIFfY+EflxH1CTP6r/KIlcJNw= +github.com/piprate/json-gold v0.7.0/go.mod h1:RVhE35veDX19r5gfUAR+IYHkAUuPwJO8Ie/qVeFaIzw= github.com/pjbgf/sha1cd v0.6.0 h1:3WJ8Wz8gvDz29quX1OcEmkAlUg9diU4GxJHqs0/XiwU= github.com/pjbgf/sha1cd v0.6.0/go.mod h1:lhpGlyHLpQZoxMv8HcgXvZEhcGs0PG/vsZnEJ7H0iCM= github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= @@ -809,6 +811,8 @@ github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRI github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI= github.com/posener/complete v1.2.3/go.mod h1:WZIdtGGp+qx0sLrYKtIRAruyNpv6hFCicSgv7Sy7s/s= +github.com/pquerna/cachecontrol v0.0.0-20180517163645-1555304b9b35 h1:J9b7z+QKAmPf4YLrFg6oQUotqHQeUNWwkvo7jZp1GLU= +github.com/pquerna/cachecontrol v0.0.0-20180517163645-1555304b9b35/go.mod h1:prYjPmNq4d1NPVmpShWobRqXY3q7Vp+80DqgxxUrUIA= github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU= @@ -877,8 +881,8 @@ github.com/sourcegraph/conc v0.3.1-0.20240121214520-5f936abd7ae8/go.mod h1:3n1Cw github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= github.com/spdx/gordf v0.0.0-20201111095634-7098f93598fb h1:bLo8hvc8XFm9J47r690TUKBzcjSWdJDxmjXJZ+/f92U= github.com/spdx/gordf v0.0.0-20201111095634-7098f93598fb/go.mod h1:uKWaldnbMnjsSAXRurWqqrdyZen1R7kxl8TkmWk2OyM= -github.com/spdx/tools-golang v0.5.7 h1:+sWcKGnhwp3vLdMqPcLdA6QK679vd86cK9hQWH3AwCg= -github.com/spdx/tools-golang v0.5.7/go.mod h1:jg7w0LOpoNAw6OxKEzCoqPC2GCTj45LyTlVmXubDsYw= +github.com/spdx/tools-golang v0.6.0-rc4 h1:2GkvNr0DcnJHY9BDm3OYHo229jZS/h4qYDK+tHYXPOo= +github.com/spdx/tools-golang v0.6.0-rc4/go.mod h1:ruCHu3shgy7bVbZ7gtEU4Gq4fI08n2SdXtgV5PoN/OM= github.com/spf13/afero v1.3.3/go.mod h1:5KUK8ByomD5Ti5Artl0RtHeI5pTF7MIDuXL3yY520V4= github.com/spf13/afero v1.6.0/go.mod h1:Ai8FlHk4v/PARR026UzYexafAt9roJ7LcLMAmO6Z93I= github.com/spf13/afero v1.15.0 h1:b/YBCLWAJdFWJTN9cLhiXXcD7mzKn9Dm86dNnfyQw1I= diff --git a/syft/format/common/spdxhelpers/to_syft_model.go b/syft/format/common/spdxhelpers/to_syft_model.go index 80bbbf964..c375114bb 100644 --- a/syft/format/common/spdxhelpers/to_syft_model.go +++ b/syft/format/common/spdxhelpers/to_syft_model.go @@ -399,7 +399,7 @@ func collectDocRelationships(spdxIDMap map[string]any, doc *spdx.Document) (out to = toLocation case helpers.OtherRelationship: // Encoding uses a specifically formatted comment... - if strings.Index(r.RelationshipComment, string(artifact.EvidentByRelationship)) == 0 { + if strings.HasPrefix(r.RelationshipComment, string(artifact.EvidentByRelationship)) { typ = artifact.EvidentByRelationship to = toLocation } @@ -418,7 +418,7 @@ func collectDocRelationships(spdxIDMap map[string]any, doc *spdx.Document) (out to = toPackage case helpers.OtherRelationship: // Encoding uses a specifically formatted comment... - if strings.Index(r.RelationshipComment, string(artifact.OwnershipByFileOverlapRelationship)) == 0 { + if strings.HasPrefix(r.RelationshipComment, string(artifact.OwnershipByFileOverlapRelationship)) { typ = artifact.OwnershipByFileOverlapRelationship to = toPackage } @@ -462,10 +462,10 @@ func collectPackageFileRelationships(spdxIDMap map[string]any, doc *spdx.Documen func toSyftCoordinates(f *spdx.File) file.Coordinates { const layerIDPrefix = "layerID: " var fileSystemID string - if strings.Index(f.FileComment, layerIDPrefix) == 0 { + if strings.HasPrefix(f.FileComment, layerIDPrefix) { fileSystemID = strings.TrimPrefix(f.FileComment, layerIDPrefix) } - if strings.Index(string(f.FileSPDXIdentifier), layerIDPrefix) == 0 { + if strings.HasPrefix(string(f.FileSPDXIdentifier), layerIDPrefix) { fileSystemID = strings.TrimPrefix(string(f.FileSPDXIdentifier), layerIDPrefix) } return file.Coordinates{ @@ -481,8 +481,8 @@ func toSyftLocation(f *spdx.File) file.Location { func requireAndTrimPrefix(val any, prefix string) string { if v, ok := val.(string); ok { - if i := strings.Index(v, prefix); i == 0 { - return strings.Replace(v, prefix, "", 1) + if strings.HasPrefix(v, prefix) { + return strings.TrimPrefix(v, prefix) } } return "" diff --git a/syft/format/common/spdxhelpers/to_syft_model_v3.go b/syft/format/common/spdxhelpers/to_syft_model_v3.go new file mode 100644 index 000000000..336c2d30b --- /dev/null +++ b/syft/format/common/spdxhelpers/to_syft_model_v3.go @@ -0,0 +1,774 @@ +package spdxhelpers + +import ( + "errors" + "fmt" + "reflect" + "regexp" + "strconv" + "strings" + + spdx "github.com/spdx/tools-golang/spdx/v3/v3_0" + + "github.com/anchore/packageurl-go" + "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/cpe" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/format/internal" + "github.com/anchore/syft/syft/format/internal/spdxutil/helpers" + "github.com/anchore/syft/syft/license" + "github.com/anchore/syft/syft/linux" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/sbom" + "github.com/anchore/syft/syft/source" +) + +func ToSyftModelV3(doc *spdx.Document) (*sbom.SBOM, error) { + if doc == nil { + return nil, errors.New("cannot convert SPDX document to Syft model because document is nil") + } + + spdxMap := ptrMap[any]{} + + s := &sbom.SBOM{ + Source: v3extractSource(spdxMap, doc), + Artifacts: sbom.Artifacts{ + Packages: pkg.NewCollection(), + FileMetadata: map[file.Coordinates]file.Metadata{}, + FileDigests: map[file.Coordinates][]file.Digest{}, + LinuxDistribution: v3findLinuxReleaseByPURL(doc), + }, + } + + relationships := v3relationshipMap(doc) + + v3collectSyftPackages(s, spdxMap, relationships, doc) + + v3collectSyftFiles(s, spdxMap, doc) + + s.Relationships = v3toSyftRelationships(spdxMap, doc) + + return s, nil +} + +func v3removePackage(packages spdx.ElementList, remove spdx.AnyPackage) (pkgs spdx.ElementList) { + for _, p := range packages { + if p == remove { + continue + } + pkgs = append(pkgs, p) + } + return pkgs +} + +func v3removeRelationships(elements spdx.ElementList, element spdx.AnyElement) (relations spdx.ElementList) { + for _, e := range elements { + if r, ok := e.(spdx.AnyRelationship); ok { + if r != nil && r.GetFrom() == element { + continue + } + + if r != nil { + var tos spdx.ElementList + for _, to := range r.GetTo() { + if to == element { + continue + } + tos = append(tos, to) + } + r.SetTo(tos) + if len(r.GetTo()) == 0 { + continue + } + } + + relations = append(relations, r) + } else { + relations = append(relations, e) + } + } + return relations +} + +func v3findRootPackages(doc spdx.AnyElementCollection) (out spdx.PackageList) { + for _, p := range doc.GetRootElements().Packages() { + out = append(out, p) + } + for _, s := range doc.GetRootElements().SBOMs() { + for _, p := range s.GetRootElements().Packages() { + out = append(out, p) + } + } + return +} + +func v3extractSource(spdxMap ptrMap[any], doc *spdx.Document) source.Description { + namespace := doc.ID + if namespace == "" && len(doc.NamespaceMaps) > 0 { + namespace = string(doc.NamespaceMaps[0].GetNamespace()) + } + src := extractSourceFromNamespace(namespace) + + rootPackages := v3findRootPackages(doc) + + if len(rootPackages) != 1 { + return src + } + + p := rootPackages[0] + + switch p.GetPrimaryPurpose() { + case spdx.SoftwarePurpose_Container: + src = v3containerSource(p) + case spdx.SoftwarePurpose_File: + src = v3fileSource(p) + default: + return src + } + + spdxMap.Set(p, src) + + doc.Elements = v3removePackage(doc.Elements, p) + doc.Elements = v3removeRelationships(doc.Elements, p) + + return src +} + +func v3containerSource(p spdx.AnyPackage) source.Description { + container := p.GetName() + v := p.GetVersion() + if v != "" { + container += ":" + v + } + + digest := "" + if len(p.GetVerifiedUsing().Hashes()) > 0 { + h := p.GetVerifiedUsing().Hashes()[0] + if h != nil { + digest = fmt.Sprintf("%s:%s", v3fromChecksumAlgorithm(h.GetAlgorithm()), h.GetValue()) + } + } + + supplier := "" + if p.GetSuppliedBy() != nil { + supplier = v3agentString(p.GetSuppliedBy()) + } + + return source.Description{ + ID: p.GetID(), + Name: p.GetName(), + Version: p.GetVersion(), + Supplier: supplier, + Metadata: source.ImageMetadata{ + UserInput: container, + ID: p.GetID(), + Layers: nil, // TODO handle formats with nested layer packages like Tern and K8s BOM tool + ManifestDigest: digest, + }, + } +} + +func v3fileSource(p spdx.AnyPackage) source.Description { + typeRegex := regexp.MustCompile("DocumentRoot-([^-]+)-.*$") + typeName := typeRegex.ReplaceAllString(p.GetID(), "$1") + + var version string + var metadata any + switch { + case typeName == prefixDirectory: + // is a Syft SBOM, explicitly a directory source + metadata, version = v3directorySourceMetadata(p) + case typeName == prefixFile: + // is a Syft SBOM, explicitly a file source + metadata, version = v3fileSourceMetadata(p) + case isDirectory(p.GetName()): + // is a non-Syft SBOM, which looks like a directory + metadata, version = v3directorySourceMetadata(p) + default: + // is a non-Syft SBOM, which is probably a file + metadata, version = v3fileSourceMetadata(p) + } + + supplier := "" + if p.GetSuppliedBy() != nil { + supplier = p.GetSuppliedBy().GetName() + } + + return source.Description{ + ID: p.GetID(), + Name: p.GetName(), + Version: version, + Supplier: supplier, + Metadata: metadata, + } +} + +func v3fileSourceMetadata(p spdx.AnyPackage) (any, string) { + version := p.GetVersion() + + m := source.FileMetadata{ + Path: p.GetName(), + } + // if this is a Syft SBOM, we might have output a digest as the version + checksum := v3toChecksum(p.GetVersion()) + for _, d := range p.GetVerifiedUsing().Hashes() { + if checksum != nil && checksum.GetValue() == d.GetValue() { + version = "" + } + m.Digests = append(m.Digests, file.Digest{ + Algorithm: v3fromChecksumAlgorithm(d.GetAlgorithm()), + Value: d.GetValue(), + }) + } + + return m, version +} + +func v3directorySourceMetadata(p spdx.AnyPackage) (any, string) { + return source.DirectoryMetadata{ + Path: p.GetName(), + Base: "", + }, p.GetVersion() +} + +func v3findLinuxReleaseByPURL(doc *spdx.Document) *linux.Release { + for _, p := range doc.Elements.Packages() { + purlValue := v3findPURLValue(p) + if purlValue == "" { + continue + } + purl, err := packageurl.FromString(purlValue) + if err != nil { + log.Warnf("unable to parse purl: %s", purlValue) + continue + } + distro := v3findQualifierValue(purl, pkg.PURLQualifierDistro) + if distro != "" { + parts := strings.Split(distro, "-") + name := parts[0] + version := "" + if len(parts) > 1 { + version = parts[1] + } + return &linux.Release{ + PrettyName: name, + Name: name, + ID: name, + IDLike: []string{name}, + Version: version, + VersionID: version, + } + } + } + + return nil +} + +func v3collectSyftPackages(s *sbom.SBOM, spdxMap ptrMap[any], relationships ptrMap[[]spdx.AnyRelationship], doc *spdx.Document) { + skipIDs := v3packageIDsToSkip(doc) + found := ptrMap[struct{}]{} + // tools-golang collects all elements from the JSON LD @graph to the root SpdxDocument.Elements property + for _, elementList := range []spdx.ElementList{doc.Elements, doc.RootElements} { + for _, p := range elementList.Packages() { + if p == nil || skipIDs.Has(p) || found.Has(p) { + continue + } + found.Set(p, struct{}{}) + syftPkg := v3toSyftPackage(relationships, p) + spdxMap.Set(p, syftPkg) + s.Artifacts.Packages.Add(syftPkg) + } + } +} + +func v3collectSyftFiles(s *sbom.SBOM, spdxMap ptrMap[any], doc *spdx.Document) { + found := ptrMap[struct{}]{} + for _, elementList := range []spdx.ElementList{doc.Elements, doc.RootElements} { + for _, f := range elementList.Files() { + if found.Has(f) { + continue + } + found.Set(f, struct{}{}) + l := v3toSyftLocation(f) + spdxMap.Set(f, l) + + s.Artifacts.FileMetadata[l.Coordinates] = v3toFileMetadata(f) + s.Artifacts.FileDigests[l.Coordinates] = v3toFileDigests(f) + } + } +} + +func v3toFileDigests(f spdx.AnyFile) (digests []file.Digest) { + for _, h := range f.GetVerifiedUsing().Hashes() { + digests = append(digests, file.Digest{ + Algorithm: v3fromChecksumAlgorithm(h.GetAlgorithm()), + Value: h.GetValue(), + }) + } + return digests +} + +func v3fromChecksumAlgorithm(algorithm spdx.HashAlgorithm) string { + // it might be better to have a specific case statement with constants + parts := strings.Split(algorithm.GetID(), "/") + return strings.ToLower(parts[len(parts)-1]) +} + +func v3toFileMetadata(f spdx.AnyFile) (meta file.Metadata) { + // FIXME Syft is currently lossy due to the SPDX 2.2.1 spec not supporting arbitrary mimetypes + if f.GetContentType() != "" { + meta.MIMEType = f.GetContentType() + } + return meta +} + +func v3toSyftRelationships(spdxMap ptrMap[any], doc *spdx.Document) []artifact.Relationship { + out := v3collectDocRelationships(spdxMap, doc) + + return out +} + +//nolint:gocognit +func v3collectDocRelationships(spdxMap ptrMap[any], doc *spdx.Document) (out []artifact.Relationship) { + for _, r := range doc.Elements.Relationships() { + from := r.GetFrom() + if from == nil || from.GetID() == "" { + log.Debugf("ignoring relationship to external document: %+v", r) + continue + } + a := spdxMap.Get(from) + + for _, to := range r.GetTo() { + b := spdxMap.Get(to) + from, fromOk := a.(pkg.Package) + toPackage, toPackageOk := b.(pkg.Package) + toLocation, toLocationOk := b.(file.Location) + //nolint:staticcheck + if !fromOk || !(toPackageOk || toLocationOk) { + log.Debugf("unable to find valid relationship mapping from SPDX, ignoring: (from: %+v) (to: %+v)", a, b) + continue + } + var to artifact.Identifiable + var typ artifact.RelationshipType + if toLocationOk { + switch r.GetType() { + case spdx.RelationshipType_Contains: + typ = artifact.ContainsRelationship + to = toLocation + case spdx.RelationshipType_Other: + // Encoding uses a specifically formatted comment... + if strings.HasPrefix(r.GetComment(), string(artifact.EvidentByRelationship)) { + typ = artifact.EvidentByRelationship + to = toLocation + } + } + } else { + switch r.GetType() { + case spdx.RelationshipType_DependsOn: + typ = artifact.DependencyOfRelationship + to = from + from = toPackage + case spdx.RelationshipType_Contains: + typ = artifact.ContainsRelationship + to = toPackage + case spdx.RelationshipType_Other: + // Encoding uses a specifically formatted comment... + if strings.HasPrefix(r.GetComment(), string(artifact.OwnershipByFileOverlapRelationship)) { + typ = artifact.OwnershipByFileOverlapRelationship + to = toPackage + } + } + } + if typ != "" && to != nil { + out = append(out, artifact.Relationship{ + From: from, + To: to, + Type: typ, + }) + } + } + } + return out +} + +func v3toSyftCoordinates(f spdx.AnyFile) file.Coordinates { + const layerIDPrefix = "layerID: " + var fileSystemID string + if strings.HasPrefix(f.GetComment(), layerIDPrefix) { + fileSystemID = strings.TrimPrefix(f.GetComment(), layerIDPrefix) + } + if strings.HasPrefix(f.GetID(), layerIDPrefix) { + fileSystemID = strings.TrimPrefix(f.GetID(), layerIDPrefix) + } + return file.Coordinates{ + RealPath: f.GetName(), + FileSystemID: fileSystemID, + } +} + +func v3toSyftLocation(f spdx.AnyFile) file.Location { + l := file.NewVirtualLocationFromCoordinates(v3toSyftCoordinates(f), f.GetName()) + return l +} + +func v3findQualifierValue(purl packageurl.PackageURL, qualifier string) string { + for _, q := range purl.Qualifiers { + if q.Key == qualifier { + return q.Value + } + } + return "" +} + +func v3extractPkgInfo(p spdx.AnyPackage) pkgInfo { + pu := v3findPURLValue(p) + purl, err := packageurl.FromString(pu) + if err != nil { + return pkgInfo{} + } + return pkgInfo{ + purl, + pkg.TypeByName(purl.Type), + pkg.LanguageByName(purl.Type), + } +} + +func v3toSyftPackage(relationships ptrMap[[]spdx.AnyRelationship], p spdx.AnyPackage) pkg.Package { + info := v3extractPkgInfo(p) + sP := &pkg.Package{ + Type: info.typ, + Name: p.GetName(), + Version: p.GetVersion(), + Licenses: pkg.NewLicenseSet(v3parseSPDXLicenses(relationships, p)...), + CPEs: v3extractCPEs(p), + PURL: v3purlValue(info.purl), + Language: info.lang, + Metadata: v3extractMetadata(p, info), + } + + internal.Backfill(sP) + + if p.GetID() != "" { + // always prefer the IDs from the SBOM over derived IDs + sP.OverrideID(artifact.ID(p.GetID())) + } else { + sP.SetID() + } + + return *sP +} + +func v3purlValue(purl packageurl.PackageURL) string { + val := purl.String() + if _, err := packageurl.FromString(val); err != nil { + return "" + } + return val +} + +func v3parseSPDXLicenses(relationships ptrMap[[]spdx.AnyRelationship], p spdx.AnyPackage) []pkg.License { + licenses := make([]pkg.License, 0) + + // licenses are defined with relationships in SPDX 3, see: + // https://github.com/spdx/tools-golang/blob/spdx3/spdx/v3/v3_0/convert.go#L536 + rels := relationships.Get(p) + for _, r := range rels { + if r.GetType() == spdx.RelationshipType_HasConcludedLicense { + licenses = append(licenses, v3toSyftLicenses(license.Concluded, r.GetTo().LicenseInfos()...)...) + } + if r.GetType() == spdx.RelationshipType_HasDeclaredLicense { + licenses = append(licenses, v3toSyftLicenses(license.Declared, r.GetTo().LicenseInfos()...)...) + } + } + + return licenses +} + +func v3toSyftLicenses(licenseType license.Type, licenses ...spdx.AnyLicenseInfo) []pkg.License { + var out []pkg.License + for _, lic := range licenses { + if lic == nil { + continue + } + value := v3licenseInfoToExpression(lic) + if value == "" { + log.Debugf("skipping SPDX license during import: %#v", lic) + continue + } + l := pkg.NewLicense(value) + if l.Value != "" { + l.Type = licenseType + out = append(out, l) + } + } + return out +} + +// v3licenseInfoToExpression recursively converts an SPDX 3.0 license info object to an SPDX expression string. +func v3licenseInfoToExpression(info spdx.AnyLicenseInfo) string { + switch li := info.(type) { + case spdx.AnyLicenseExpression: + return li.GetLicenseExpression() + case spdx.AnyOrLaterOperator: + subject := v3licenseInfoToExpression(li.GetSubjectLicense()) + if subject != "" && !strings.HasSuffix(subject, "+") { + return subject + "+" + } + case spdx.AnyWithAdditionOperator: + subject := v3licenseInfoToExpression(li.GetSubjectExtendableLicense()) + addition := li.GetSubjectAddition() + if subject != "" && addition != nil { + return subject + " WITH " + addition.GetAdditionText() + } + case spdx.AnyConjunctiveLicenseSet: + var parts []string + for _, m := range li.GetMembers() { + if e := v3licenseInfoToExpression(m); e != "" { + parts = append(parts, e) + } + } + if len(parts) > 0 { + return strings.Join(parts, " AND ") + } + case spdx.AnyDisjunctiveLicenseSet: + var parts []string + for _, m := range li.GetMembers() { + if e := v3licenseInfoToExpression(m); e != "" { + parts = append(parts, e) + } + } + if len(parts) > 0 { + return "(" + strings.Join(parts, " OR ") + ")" + } + case spdx.AnyListedLicense: + return li.GetName() + case spdx.AnyCustomLicense: + if li.GetID() != "" { + return li.GetID() + } + return li.GetName() + } + return "" +} + +//nolint:funlen +func v3extractMetadata(p spdx.AnyPackage, info pkgInfo) any { + arch := info.qualifierValue(pkg.PURLQualifierArch) + upstreamValue := info.qualifierValue(pkg.PURLQualifierUpstream) + upstream := strings.SplitN(upstreamValue, "@", 2) + upstreamName := upstream[0] + upstreamVersion := "" + if len(upstream) > 1 { + upstreamVersion = upstream[1] + } + supplier := "" + if p.GetSuppliedBy() != nil { + supplier = v3agentString(p.GetSuppliedBy()) + } + originator := "" + if len(p.GetOriginatedBy()) > 0 { + // FIXME there could be multiple + originator = v3agentString(p.GetOriginatedBy()[0]) + } + switch info.typ { + case pkg.ApkPkg: + return pkg.ApkDBEntry{ + Package: p.GetName(), + OriginPackage: upstreamName, + Maintainer: supplier, + Version: p.GetVersion(), + Architecture: arch, + URL: string(p.GetHomePage()), + Description: p.GetDescription(), + } + case pkg.RpmPkg: + converted, err := strconv.Atoi(info.qualifierValue(pkg.PURLQualifierEpoch)) + var epoch *int + if err != nil { + epoch = nil + } else { + epoch = &converted + } + return pkg.RpmDBEntry{ + Name: p.GetName(), + Version: p.GetVersion(), + Epoch: epoch, + Arch: arch, + SourceRpm: upstreamValue, + Vendor: originator, + } + case pkg.DebPkg: + return pkg.DpkgDBEntry{ + Package: p.GetName(), + Source: upstreamName, + Version: p.GetVersion(), + SourceVersion: upstreamVersion, + Architecture: arch, + Maintainer: originator, + } + case pkg.JavaPkg: + var digests []file.Digest + for _, value := range p.GetVerifiedUsing() { + h, _ := value.(spdx.AnyHash) + if h != nil { + digests = append(digests, file.Digest{Algorithm: v3fromChecksumAlgorithm(h.GetAlgorithm()), Value: h.GetValue()}) + } + } + return pkg.JavaArchive{ + ArchiveDigests: digests, + } + case pkg.GoModulePkg: + var h1Digest string + for _, value := range p.GetVerifiedUsing() { + h, _ := value.(spdx.AnyHash) + if h == nil { + continue + } + digest, err := helpers.HDigestFromSHA(v3fromChecksumAlgorithm(h.GetAlgorithm()), h.GetValue()) + if err != nil { + log.Debugf("invalid h1digest: %v %v", value, err) + continue + } + h1Digest = digest + break + } + return pkg.GolangBinaryBuildinfoEntry{ + H1Digest: h1Digest, + } + } + return nil +} + +func v3agentString(agent spdx.AnyAgent) string { + out := "" + switch o := agent.(type) { + case spdx.AnyOrganization: + out = o.GetName() + case spdx.AnyPerson: + out = o.GetName() + } + if out == helpers.NOASSERTION { + return "" + } + return out +} + +func v3findPURLValue(p spdx.AnyPackage) string { + if p.GetPackageURL() != "" { + return string(p.GetPackageURL()) + } + for _, r := range p.GetExternalIdentifiers() { + if r.GetType() == spdx.ExternalIdentifierType_PackageURL { + for _, l := range r.GetIdentifierLocators() { + // FIXME multiple values + return string(l) + } + } + } + return "" +} + +func v3extractCPEs(p spdx.AnyPackage) (cpes []cpe.CPE) { + for _, r := range p.GetExternalIdentifiers() { + if r.GetType() == spdx.ExternalIdentifierType_Cpe23 || r.GetType() == spdx.ExternalIdentifierType_Cpe22 { + c, err := cpe.New(r.GetIdentifier(), cpe.DeclaredSource) + if err == nil { + cpes = append(cpes, c) + continue + } + log.Tracef("unable to extract SPDX IDENTIFIER CPE=%q: %+v", r.GetIdentifier(), err) + // try the locators + for _, l := range r.GetIdentifierLocators() { + c, err = cpe.New(string(l), cpe.DeclaredSource) + if err != nil { + log.Tracef("unable to extract SPDX CPE=%q: %+v", l, err) + continue + } + cpes = append(cpes, c) + } + } + } + return cpes +} + +// v3packageIDsToSkip returns a set of packageIDs that should not be imported +func v3packageIDsToSkip(doc *spdx.Document) ptrMap[struct{}] { + skipIDs := ptrMap[struct{}]{} + for _, r := range doc.Elements.Relationships() { + if r != nil && r.GetFrom() != nil && r.GetType() == spdx.RelationshipType_Generates { + skipIDs.Set(r.GetFrom(), struct{}{}) // flipped from GENERATED_FROM + } + } + return skipIDs +} + +// toChecksum takes a checksum in the format : and returns an spdx.Checksum or nil if the string is invalid +func v3toChecksum(algorithmHash string) spdx.AnyHash { + parts := strings.Split(algorithmHash, ":") + if len(parts) < 2 { + return nil + } + return &spdx.Hash{ + Algorithm: v3toChecksumAlgorithm(parts[0]), + Value: parts[1], + } +} + +func v3toChecksumAlgorithm(algorithm string) spdx.HashAlgorithm { + // this needs to be an uppercase version of our algorithm + switch strings.ToLower(algorithm) { + case "sha1": + return spdx.HashAlgorithm_Sha1 + case "sha256": + return spdx.HashAlgorithm_Sha256 + case "sha384": + return spdx.HashAlgorithm_Sha384 + case "sha512": + return spdx.HashAlgorithm_Sha512 + case "md5": + return spdx.HashAlgorithm_Md5 + } + return spdx.HashAlgorithm{} +} + +func v3relationshipMap(doc *spdx.Document) ptrMap[[]spdx.AnyRelationship] { + relationships := ptrMap[[]spdx.AnyRelationship]{} + for _, r := range doc.Elements.Relationships() { + existing := relationships.Get(r.GetFrom()) + relationships.Set(r.GetFrom(), append(existing, r)) + } + return relationships +} + +// SPDX 3 values are stored as pointers, and there is a distinct possibility that an ID +// will be blank if it was a JSON-LD blank node in the document, as these IDs are not persisted +// nor should they be persisted from decoding + +type ptrMap[T any] map[reflect.Value]T + +func (s ptrMap[T]) Set(k any, v T) { + s[ptrTo(k)] = v +} + +func (s ptrMap[T]) Get(k any) T { + return s[ptrTo(k)] +} + +func (s ptrMap[T]) Remove(k any) { + delete(s, ptrTo(k)) +} + +func (s ptrMap[T]) Has(k any) bool { + _, ok := s[ptrTo(k)] + return ok +} + +func ptrTo(k any) reflect.Value { + rv := reflect.ValueOf(k) + if rv.Kind() != reflect.Pointer { + // this case is a programming problem -- all objects in the SPDX 3 model are pointers and may not have IDs set + panic(fmt.Errorf("value is not a pointer; comparable SPDX 3 elements are pointers, this is probably an implementation issue: %#v", k)) + } + return rv +} diff --git a/syft/format/common/spdxhelpers/to_syft_model_v3_test.go b/syft/format/common/spdxhelpers/to_syft_model_v3_test.go new file mode 100644 index 000000000..f00399b21 --- /dev/null +++ b/syft/format/common/spdxhelpers/to_syft_model_v3_test.go @@ -0,0 +1,436 @@ +package spdxhelpers + +import ( + "testing" + + spdx "github.com/spdx/tools-golang/spdx/v3/v3_0" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/license" + "github.com/anchore/syft/syft/pkg" +) + +func TestToSyftModelV3_documentConversion(t *testing.T) { + // Build SPDX 3.0 elements + pkg1 := &spdx.Package{ + ID: "pkg-1-id", + Name: "pkg-1", + Version: "1.0.0", + ExternalIdentifiers: spdx.ExternalIdentifierList{ + &spdx.ExternalIdentifier{ + Type: spdx.ExternalIdentifierType_PackageURL, + Identifier: "pkg:npm/pkg-1@1.0.0", + IdentifierLocators: []spdx.URI{"pkg:npm/pkg-1@1.0.0"}, + }, + }, + } + pkg2 := &spdx.Package{ + ID: "pkg-2-id", + Name: "pkg-2", + Version: "2.0.0", + ExternalIdentifiers: spdx.ExternalIdentifierList{ + &spdx.ExternalIdentifier{ + Type: spdx.ExternalIdentifierType_PackageURL, + Identifier: "pkg:npm/pkg-2@2.0.0", + IdentifierLocators: []spdx.URI{"pkg:npm/pkg-2@2.0.0"}, + }, + }, + } + + file1 := &spdx.File{ + ID: "file-1-id", + Name: "/src/main.go", + VerifiedUsing: spdx.IntegrityMethodList{ + &spdx.Hash{ + Algorithm: spdx.HashAlgorithm_Sha256, + Value: "abc123", + }, + }, + } + file2 := &spdx.File{ + ID: "file-2-id", + Name: "/src/util.go", + } + + // licenses + pkg1ConcludedLicense := &spdx.ListedLicense{Name: "MIT"} + pkg1DeclaredLicense := &spdx.DisjunctiveLicenseSet{ + Members: spdx.LicenseInfoList{ + &spdx.ListedLicense{Name: "MIT"}, + &spdx.ListedLicense{Name: "Apache-2.0"}, + }, + } + pkg2ConcludedLicense := &spdx.ConjunctiveLicenseSet{ + Members: spdx.LicenseInfoList{ + &spdx.ListedLicense{Name: "BSD-3-Clause"}, + &spdx.OrLaterOperator{ + SubjectLicense: &spdx.ListedLicense{Name: "GPL-2.0"}, + }, + }, + } + + // relationships + pkg1DependsOnPkg2 := &spdx.Relationship{ + From: pkg1, + To: spdx.ElementList{pkg2}, + Type: spdx.RelationshipType_DependsOn, + } + pkg1ContainsFile1 := &spdx.Relationship{ + From: pkg1, + To: spdx.ElementList{file1}, + Type: spdx.RelationshipType_Contains, + } + pkg2ContainsFile2 := &spdx.Relationship{ + From: pkg2, + To: spdx.ElementList{file2}, + Type: spdx.RelationshipType_Contains, + } + pkg1HasConcludedLicense := &spdx.Relationship{ + From: pkg1, + To: spdx.ElementList{pkg1ConcludedLicense}, + Type: spdx.RelationshipType_HasConcludedLicense, + } + pkg1HasDeclaredLicense := &spdx.Relationship{ + From: pkg1, + To: spdx.ElementList{pkg1DeclaredLicense}, + Type: spdx.RelationshipType_HasDeclaredLicense, + } + pkg2HasConcludedLicense := &spdx.Relationship{ + From: pkg2, + To: spdx.ElementList{pkg2ConcludedLicense}, + Type: spdx.RelationshipType_HasConcludedLicense, + } + + sbomElement := &spdx.SBOM{ + RootElements: spdx.ElementList{pkg1}, + Elements: spdx.ElementList{ + pkg1, pkg2, + file1, file2, + pkg1DependsOnPkg2, + pkg1ContainsFile1, + pkg2ContainsFile2, + pkg1HasConcludedLicense, + pkg1HasDeclaredLicense, + pkg2HasConcludedLicense, + }, + } + + doc := &spdx.Document{ + SpdxDocument: spdx.SpdxDocument{ + ID: "https://example.org/test-doc", + Name: "test-document", + RootElements: spdx.ElementList{sbomElement}, + Elements: spdx.ElementList{ + sbomElement, + pkg1, pkg2, + file1, file2, + pkg1DependsOnPkg2, + pkg1ContainsFile1, + pkg2ContainsFile2, + pkg1HasConcludedLicense, + pkg1HasDeclaredLicense, + pkg2HasConcludedLicense, + }, + }, + } + + // Convert + result, err := ToSyftModelV3(doc) + require.NoError(t, err) + require.NotNil(t, result) + + // Verify packages + allPkgs := result.Artifacts.Packages.Sorted() + require.Len(t, allPkgs, 2, "expected 2 packages") + + pkgsByName := map[string]pkg.Package{} + for _, p := range allPkgs { + pkgsByName[p.Name] = p + } + + syftPkg1, ok := pkgsByName["pkg-1"] + require.True(t, ok, "pkg-1 not found") + assert.Equal(t, "1.0.0", syftPkg1.Version) + assert.Equal(t, "pkg:npm/pkg-1@1.0.0", syftPkg1.PURL) + + syftPkg2, ok := pkgsByName["pkg-2"] + require.True(t, ok, "pkg-2 not found") + assert.Equal(t, "2.0.0", syftPkg2.Version) + assert.Equal(t, "pkg:npm/pkg-2@2.0.0", syftPkg2.PURL) + + // Verify pkg-1 licenses: concluded MIT + declared (MIT OR Apache-2.0) + pkg1Licenses := syftPkg1.Licenses.ToSlice() + require.NotEmpty(t, pkg1Licenses, "pkg-1 should have licenses") + + var pkg1Concluded, pkg1Declared []pkg.License + for _, l := range pkg1Licenses { + switch l.Type { + case license.Concluded: + pkg1Concluded = append(pkg1Concluded, l) + case license.Declared: + pkg1Declared = append(pkg1Declared, l) + } + } + require.Len(t, pkg1Concluded, 1) + assert.Equal(t, "MIT", pkg1Concluded[0].Value) + require.Len(t, pkg1Declared, 1) + assert.Equal(t, "(MIT OR Apache-2.0)", pkg1Declared[0].Value) + + // Verify pkg-2 licenses: concluded BSD-3-Clause AND GPL-2.0-only+ + pkg2Licenses := syftPkg2.Licenses.ToSlice() + require.NotEmpty(t, pkg2Licenses, "pkg-2 should have licenses") + + var pkg2Concluded []pkg.License + for _, l := range pkg2Licenses { + if l.Type == license.Concluded { + pkg2Concluded = append(pkg2Concluded, l) + } + } + require.Len(t, pkg2Concluded, 1) + assert.Equal(t, "BSD-3-Clause AND GPL-2.0+", pkg2Concluded[0].Value) + + // Verify files + coords1 := file.Coordinates{RealPath: "/src/main.go"} + digests, ok := result.Artifacts.FileDigests[coords1] + require.True(t, ok, "file1 digests not found") + require.Len(t, digests, 1) + assert.Equal(t, "sha256", digests[0].Algorithm) + assert.Equal(t, "abc123", digests[0].Value) + + coords2 := file.Coordinates{RealPath: "/src/util.go"} + _, ok = result.Artifacts.FileMetadata[coords2] + assert.True(t, ok, "file2 metadata not found") + + // Verify relationships + require.NotEmpty(t, result.Relationships) + + foundDependsOn := false + foundPkg1ContainsFile := false + foundPkg2ContainsFile := false + for _, rel := range result.Relationships { + fromPkg, fromOk := rel.From.(pkg.Package) + if !fromOk { + continue + } + switch toPkg := rel.To.(type) { + case pkg.Package: + if fromPkg.Name == "pkg-2" && toPkg.Name == "pkg-1" && rel.Type == artifact.DependencyOfRelationship { + foundDependsOn = true + } + case file.Location: + if fromPkg.Name == "pkg-1" && toPkg.RealPath == "/src/main.go" && rel.Type == artifact.ContainsRelationship { + foundPkg1ContainsFile = true + } + if fromPkg.Name == "pkg-2" && toPkg.RealPath == "/src/util.go" && rel.Type == artifact.ContainsRelationship { + foundPkg2ContainsFile = true + } + } + } + assert.True(t, foundDependsOn, "expected pkg-2 DependencyOf pkg-1 relationship") + assert.True(t, foundPkg1ContainsFile, "expected pkg-1 Contains /src/main.go relationship") + assert.True(t, foundPkg2ContainsFile, "expected pkg-2 Contains /src/util.go relationship") +} + +func Test_v3licenseInfoToExpression(t *testing.T) { + tests := []struct { + name string + info spdx.AnyLicenseInfo + expected string + }{ + { + name: "listed license", + info: &spdx.ListedLicense{Name: "MIT"}, + expected: "MIT", + }, + { + name: "custom license with ID", + info: &spdx.CustomLicense{ID: "LicenseRef-Custom-1", Name: "Custom License"}, + expected: "LicenseRef-Custom-1", + }, + { + name: "custom license without ID falls back to name", + info: &spdx.CustomLicense{Name: "Custom License"}, + expected: "Custom License", + }, + { + name: "license expression", + info: &spdx.LicenseExpression{LicenseExpression: "MIT AND Apache-2.0"}, + expected: "MIT AND Apache-2.0", + }, + { + name: "or-later operator", + info: &spdx.OrLaterOperator{ + SubjectLicense: &spdx.ListedLicense{Name: "GPL-2.0"}, + }, + expected: "GPL-2.0+", + }, + { + name: "conjunctive license set (AND)", + info: &spdx.ConjunctiveLicenseSet{ + Members: spdx.LicenseInfoList{ + &spdx.ListedLicense{Name: "MIT"}, + &spdx.ListedLicense{Name: "Apache-2.0"}, + }, + }, + expected: "MIT AND Apache-2.0", + }, + { + name: "disjunctive license set (OR)", + info: &spdx.DisjunctiveLicenseSet{ + Members: spdx.LicenseInfoList{ + &spdx.ListedLicense{Name: "MIT"}, + &spdx.ListedLicense{Name: "Apache-2.0"}, + }, + }, + expected: "(MIT OR Apache-2.0)", + }, + { + name: "with-addition operator", + info: &spdx.WithAdditionOperator{ + SubjectExtendableLicense: &spdx.ListedLicense{Name: "GPL-2.0-only"}, + SubjectAddition: &spdx.ListedLicenseException{AdditionText: "Classpath-exception-2.0"}, + }, + expected: "GPL-2.0-only WITH Classpath-exception-2.0", + }, + { + name: "nested: (MIT OR Apache-2.0) AND GPL-2.0", + info: &spdx.ConjunctiveLicenseSet{ + Members: spdx.LicenseInfoList{ + &spdx.DisjunctiveLicenseSet{ + Members: spdx.LicenseInfoList{ + &spdx.ListedLicense{Name: "MIT"}, + &spdx.ListedLicense{Name: "Apache-2.0"}, + }, + }, + &spdx.ListedLicense{Name: "GPL-2.0-only"}, + }, + }, + expected: "(MIT OR Apache-2.0) AND GPL-2.0-only", + }, + { + name: "nested: GPL-2.0 WITH Classpath OR MIT", + info: &spdx.DisjunctiveLicenseSet{ + Members: spdx.LicenseInfoList{ + &spdx.WithAdditionOperator{ + SubjectExtendableLicense: &spdx.ListedLicense{Name: "GPL-2.0-only"}, + SubjectAddition: &spdx.ListedLicenseException{AdditionText: "Classpath-exception-2.0"}, + }, + &spdx.ListedLicense{Name: "MIT"}, + }, + }, + expected: "(GPL-2.0-only WITH Classpath-exception-2.0 OR MIT)", + }, + { + name: "nil returns empty", + info: nil, + expected: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := v3licenseInfoToExpression(tt.info) + assert.Equal(t, tt.expected, got) + }) + } +} + +func Test_v3toSyftLicenses(t *testing.T) { + tests := []struct { + name string + licenseType license.Type + licenses []spdx.AnyLicenseInfo + expected []string + }{ + { + name: "simple listed license", + licenseType: license.Declared, + licenses: []spdx.AnyLicenseInfo{ + &spdx.ListedLicense{Name: "MIT"}, + }, + expected: []string{"MIT"}, + }, + { + name: "conjunctive set produces single expression", + licenseType: license.Concluded, + licenses: []spdx.AnyLicenseInfo{ + &spdx.ConjunctiveLicenseSet{ + Members: spdx.LicenseInfoList{ + &spdx.ListedLicense{Name: "MIT"}, + &spdx.ListedLicense{Name: "Apache-2.0"}, + }, + }, + }, + expected: []string{"MIT AND Apache-2.0"}, + }, + { + name: "multiple license infos", + licenseType: license.Declared, + licenses: []spdx.AnyLicenseInfo{ + &spdx.ListedLicense{Name: "MIT"}, + &spdx.ListedLicense{Name: "GPL-2.0-only"}, + }, + expected: []string{"MIT", "GPL-2.0-only"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := v3toSyftLicenses(tt.licenseType, tt.licenses...) + require.Len(t, got, len(tt.expected)) + for i, l := range got { + assert.Equal(t, tt.licenseType, l.Type) + assert.Equal(t, tt.expected[i], l.Value) + } + }) + } +} + +func Test_v3parseSPDXLicenses(t *testing.T) { + p := &spdx.Package{Name: "test-pkg"} + + concludedLicense := &spdx.ListedLicense{Name: "MIT"} + declaredLicense := &spdx.ConjunctiveLicenseSet{ + Members: spdx.LicenseInfoList{ + &spdx.ListedLicense{Name: "Apache-2.0"}, + &spdx.ListedLicense{Name: "BSD-3-Clause"}, + }, + } + + relationships := ptrMap[[]spdx.AnyRelationship]{} + relationships.Set(p, []spdx.AnyRelationship{ + &spdx.Relationship{ + Type: spdx.RelationshipType_HasConcludedLicense, + From: p, + To: spdx.ElementList{concludedLicense}, + }, + &spdx.Relationship{ + Type: spdx.RelationshipType_HasDeclaredLicense, + From: p, + To: spdx.ElementList{declaredLicense}, + }, + }) + + licenses := v3parseSPDXLicenses(relationships, p) + + require.Len(t, licenses, 2) + + var concluded, declared []pkg.License + for _, l := range licenses { + switch l.Type { + case license.Concluded: + concluded = append(concluded, l) + case license.Declared: + declared = append(declared, l) + } + } + + require.Len(t, concluded, 1) + assert.Equal(t, "MIT", concluded[0].Value) + + require.Len(t, declared, 1) + assert.Equal(t, "Apache-2.0 AND BSD-3-Clause", declared[0].Value) +} diff --git a/syft/format/encoders_collection.go b/syft/format/encoders_collection.go index ffe7c59cc..37c5a8975 100644 --- a/syft/format/encoders_collection.go +++ b/syft/format/encoders_collection.go @@ -14,12 +14,22 @@ import ( ) type EncoderCollection struct { - encoders []sbom.FormatEncoder + defaultVersions map[sbom.FormatID]string + encoders []sbom.FormatEncoder } func NewEncoderCollection(encoders ...sbom.FormatEncoder) *EncoderCollection { + defaultVersions := map[sbom.FormatID]string{} + for _, encoder := range encoders { + if defaultVersion, ok := encoder.(defaultVersionIndicator); ok { + if defaultVersion.DefaultVersion() { + defaultVersions[encoder.ID()] = encoder.Version() + } + } + } return &EncoderCollection{ - encoders: encoders, + defaultVersions: defaultVersions, + encoders: encoders, } } @@ -77,13 +87,20 @@ func (e EncoderCollection) Get(name string, version string) sbom.FormatEncoder { var mostRecentFormat sbom.FormatEncoder for _, f := range e.encoders { + defaultVersion := e.defaultVersions[f.ID()] log.WithFields("name", f.ID(), "version", f.Version(), "aliases", f.Aliases()).Trace("considering format") names := []string{string(f.ID())} names = append(names, f.Aliases()...) for _, n := range names { - if cleanFormatName(n) == name && versionMatches(f.Version(), version) { - if mostRecentFormat == nil || f.Version() > mostRecentFormat.Version() { - mostRecentFormat = f + if cleanFormatName(n) == name { + // if the name or alias matches, and the requested version is any-version, use the default + if version == sbom.AnyVersion && defaultVersion != "" { + version = defaultVersion + } + if versionMatches(f.Version(), version) { + if mostRecentFormat == nil || f.Version() > mostRecentFormat.Version() { + mostRecentFormat = f + } } } } @@ -141,3 +158,9 @@ func Encode(s sbom.SBOM, f sbom.FormatEncoder) ([]byte, error) { return buff.Bytes(), nil } + +type defaultVersionIndicator interface { + // DefaultVersion a sbom.FormatEncoder may indicate it is the default version to be used + // when no version is specified for the type + DefaultVersion() bool +} diff --git a/syft/format/encoders_collection_test.go b/syft/format/encoders_collection_test.go index 3e8128fb6..3bd5256c8 100644 --- a/syft/format/encoders_collection_test.go +++ b/syft/format/encoders_collection_test.go @@ -4,7 +4,9 @@ import ( "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/anchore/syft/syft/format/internal/spdxutil" "github.com/anchore/syft/syft/sbom" ) @@ -109,3 +111,76 @@ func Test_versionMatches(t *testing.T) { }) } } + +func Test_EncoderCollection_Get(t *testing.T) { + tests := []struct { + name string + searchName string + searchVersion string + expectedID sbom.FormatID + expectedVersion string + }{ + { + name: "explicit name and version", + searchName: "spdx-json", + searchVersion: "2.3", + expectedID: spdxutil.JSONFormatID, + expectedVersion: "2.3", + }, + { + name: "explicit name without version gets default", + searchName: "spdx-json", + searchVersion: "", + expectedID: spdxutil.JSONFormatID, + expectedVersion: "2.3", + }, + { + name: "alias name with version", + searchName: "spdx", + searchVersion: "2.2", + expectedID: spdxutil.TagValueFormatID, + expectedVersion: "2.2", + }, + { + name: "alias name without version gets default", + searchName: "spdx", + searchVersion: "", + expectedID: spdxutil.TagValueFormatID, + expectedVersion: "2.3", + }, + { + name: "invalid name gets nothing", + searchName: "json-spdx", + searchVersion: "2.3", + expectedID: "", + }, + { + name: "invalid version gets nothing", + searchName: "spdx-json", + searchVersion: "2.0", + expectedID: "", + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + config := DefaultEncodersConfig() + // ensure SPDX default is 2.3 for test + config.SPDXJSON.DefaultVersion = "2.3" + encoders, err := config.Encoders() + require.NoError(t, err) + collection := NewEncoderCollection(encoders...) + result := collection.Get(test.searchName, test.searchVersion) + + if test.expectedID != "" { + require.NotNil(t, result, "expected to find encoder but got nil") + if result != nil { + require.Equal(t, test.expectedID, result.ID()) + require.Equal(t, test.expectedVersion, result.Version()) + } + } else { + require.Nil(t, result, "expected nil but found encoder") + } + }) + } +} diff --git a/syft/format/internal/spdxutil/versions.go b/syft/format/internal/spdxutil/versions.go index 5718e5d2c..c7df85d0f 100644 --- a/syft/format/internal/spdxutil/versions.go +++ b/syft/format/internal/spdxutil/versions.go @@ -4,9 +4,10 @@ import ( "github.com/anchore/syft/syft/sbom" ) -const DefaultVersion = "2.3" - const ( + DefaultVersion = "2.3" + V3_0 = "3.0" + JSONFormatID sbom.FormatID = "spdx-json" TagValueFormatID sbom.FormatID = "spdx-tag-value" ) @@ -19,7 +20,10 @@ func SupportedVersions(id sbom.FormatID) []string { if id != JSONFormatID { // JSON format is not supported in v2.1 - return append([]string{"2.1"}, versions...) + versions = append([]string{"2.1"}, versions...) + } else { + // is JSON, v3 only supported in JSON format: + versions = append(versions, V3_0) } return versions diff --git a/syft/format/spdxjson/decoder.go b/syft/format/spdxjson/decoder.go index 5f38ce7ca..01e61ddb5 100644 --- a/syft/format/spdxjson/decoder.go +++ b/syft/format/spdxjson/decoder.go @@ -44,6 +44,10 @@ func (d decoder) Decode(r io.Reader) (*sbom.SBOM, sbom.FormatID, string, error) return nil, "", "", fmt.Errorf("unable to seek to start of SPDX JSON SBOM: %+v", err) } + if strings.HasPrefix(version, "3") { + return decodeSpdx3(version, reader) + } + doc, err := spdxJson.Read(reader) if err != nil { return nil, id, version, fmt.Errorf("unable to decode spdx json: %w", err) @@ -62,11 +66,6 @@ func (d decoder) Identify(r io.Reader) (sbom.FormatID, string) { return "", "" } - if _, err := reader.Seek(0, io.SeekStart); err != nil { - log.Debugf("unable to seek to start of SPDX JSON SBOM: %+v", err) - return "", "" - } - // Example JSON document // { // "spdxVersion": "SPDX-2.3", @@ -85,7 +84,18 @@ func (d decoder) Identify(r io.Reader) (sbom.FormatID, string) { id, version := getFormatInfo(doc.SPDXVersion) if version == "" || id != ID { - // not a spdx json document that we support + // not a spdx 2 json document that we support, check for v3 + + if _, err = reader.Seek(0, io.SeekStart); err != nil { + log.Debugf("unable to seek to start of SPDX JSON SBOM: %+v", err) + return "", "" + } + + id3, version3 := identifySpdx3(reader) + if id3 != "" && version3 != "" { + return id3, version3 + } + return "", "" } diff --git a/syft/format/spdxjson/encoder.go b/syft/format/spdxjson/encoder.go index 25cd699fb..9ce7476ac 100644 --- a/syft/format/spdxjson/encoder.go +++ b/syft/format/spdxjson/encoder.go @@ -9,6 +9,7 @@ import ( "github.com/spdx/tools-golang/spdx/v2/v2_1" "github.com/spdx/tools-golang/spdx/v2/v2_2" "github.com/spdx/tools-golang/spdx/v2/v2_3" + "github.com/spdx/tools-golang/spdx/v3/v3_0" "github.com/anchore/syft/syft/format/common/spdxhelpers" "github.com/anchore/syft/syft/format/internal/spdxutil" @@ -22,8 +23,9 @@ func SupportedVersions() []string { } type EncoderConfig struct { - Version string - Pretty bool // don't include spaces and newlines; same as jq -c + Version string + Pretty bool // don't include spaces and newlines; same as jq -c + DefaultVersion string } type encoder struct { @@ -38,8 +40,9 @@ func NewFormatEncoderWithConfig(cfg EncoderConfig) (sbom.FormatEncoder, error) { func DefaultEncoderConfig() EncoderConfig { return EncoderConfig{ - Version: spdxutil.DefaultVersion, - Pretty: false, + DefaultVersion: spdxutil.DefaultVersion, + Version: spdxutil.DefaultVersion, + Pretty: false, } } @@ -68,6 +71,7 @@ func (e encoder) Encode(writer io.Writer, s sbom.SBOM) error { doc := v2_1.Document{} err = convert.Document(latestDoc, &doc) encodeDoc = doc + case "2.2": doc := v2_2.Document{} err = convert.Document(latestDoc, &doc) @@ -77,6 +81,11 @@ func (e encoder) Encode(writer io.Writer, s sbom.SBOM) error { doc := v2_3.Document{} err = convert.Document(latestDoc, &doc) encodeDoc = doc + + case spdxutil.V3_0: + doc := &v3_0.Document{} + err = convert.Document(latestDoc, doc) + encodeDoc = doc default: return fmt.Errorf("unsupported SPDX version %q", e.cfg.Version) } @@ -95,3 +104,7 @@ func (e encoder) Encode(writer io.Writer, s sbom.SBOM) error { return enc.Encode(encodeDoc) } + +func (e encoder) DefaultVersion() bool { + return e.cfg.DefaultVersion == e.cfg.Version +} diff --git a/syft/format/spdxjson/encoder_test.go b/syft/format/spdxjson/encoder_test.go index 6698be490..be4680942 100644 --- a/syft/format/spdxjson/encoder_test.go +++ b/syft/format/spdxjson/encoder_test.go @@ -216,6 +216,10 @@ func redactor(values ...string) testutil.Redactor { // each SBOM reports a unique documentNamespace when generated, this is not useful for snapshot testing `"documentNamespace":\s+"[^"]*"`: `"documentNamespace":"redacted"`, + // spdx3 IDs are URI with the documentnamespace spdxId containing a UID; namespace is the equivalent documentNamespace with the same value + `"spdxId":\s+"[^"]*"`: `"spdxId":"https://redacted"`, + `"namespace":\s+"[^"]*"`: `"namespace":"https://redacted/"`, + // the license list will be updated periodically, the value here should not be directly tested in snapshot tests `"licenseListVersion":\s+"[^"]*"`: `"licenseListVersion":"redacted"`, }, @@ -239,12 +243,14 @@ func TestSupportedVersions(t *testing.T) { "2.2": 2, // the source-to-package relationships can be removed since the primaryPackagePurpose info is available in 2.3 "2.3": 0, + "3.0": 0, } pkgCountOffsetPerVersion := map[string]int{ "2.1": 1, // the source is mapped as a package, but cannot distinguish it since the primaryPackagePurpose info is not available until 2.3 "2.2": 1, // the source is mapped as a package, but cannot distinguish it since the primaryPackagePurpose info is not available until 2.3 "2.3": 0, // the source package can be removed since the primaryPackagePurpose info is available + "3.0": 0, } for _, enc := range encs { diff --git a/syft/format/spdxjson/spdx3_decoder.go b/syft/format/spdxjson/spdx3_decoder.go new file mode 100644 index 000000000..47eb7206a --- /dev/null +++ b/syft/format/spdxjson/spdx3_decoder.go @@ -0,0 +1,63 @@ +package spdxjson + +import ( + "encoding/json" + "fmt" + "io" + "regexp" + + "github.com/spdx/tools-golang/spdx/v3/v3_0" + + "github.com/anchore/syft/syft/format/common/spdxhelpers" + "github.com/anchore/syft/syft/format/internal/spdxutil" + "github.com/anchore/syft/syft/sbom" +) + +var spdx3_0contextRegex = spdxContextRegex(spdxutil.V3_0) + +func decodeSpdx3(version string, reader io.Reader) (*sbom.SBOM, sbom.FormatID, string, error) { + switch version { + case spdxutil.V3_0: + doc := v3_0.NewDocument(v3_0.ProfileIdentifierType_Software, "", nil, nil) + err := doc.FromJSON(reader) + if err != nil { + return nil, "", "", err + } + sb, err := spdxhelpers.ToSyftModelV3(doc) + return sb, spdxutil.JSONFormatID, spdxutil.V3_0, err + default: + return nil, "", "", fmt.Errorf("unsupported version: %v", version) + } +} + +func identifySpdx3(reader io.Reader) (sbom.FormatID, string) { + type Document struct { + Context string `json:"@context"` + } + + dec := json.NewDecoder(reader) + + var doc Document + if err := dec.Decode(&doc); err != nil { + // maybe not json? maybe not valid? doesn't matter, we won't process it. + return "", "" + } + + formatID := sbom.FormatID("") + spdxVersion := "" + + switch { + case doc.Context == "": + case spdx3_0contextRegex.MatchString(doc.Context): + formatID = spdxutil.JSONFormatID + spdxVersion = spdxutil.V3_0 + default: + } + + return formatID, spdxVersion +} + +func spdxContextRegex(minorVersion string) *regexp.Regexp { + // today this is "3.0.1", but is likely to be changed to only include the minor version "3.0" + return regexp.MustCompile(regexp.QuoteMeta("https://spdx.org/rdf/") + minorVersion + `(\.\d+)?` + regexp.QuoteMeta("/spdx-context.jsonld")) +} diff --git a/syft/format/spdxjson/spdx3_decoder_test.go b/syft/format/spdxjson/spdx3_decoder_test.go new file mode 100644 index 000000000..81708294e --- /dev/null +++ b/syft/format/spdxjson/spdx3_decoder_test.go @@ -0,0 +1,426 @@ +package spdxjson + +import ( + "bytes" + "context" + "sort" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/stretchr/testify/require" + + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/cpe" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/format/internal/spdxutil" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/sbom" + "github.com/anchore/syft/syft/source" +) + +// TestSPDX3JSONRoundTrip_AllPackageTypes encodes a syft SBOM as SPDX 3.0 JSON and decodes it back, asserting +// (via a full struct diff) that every package field representable in SPDX 3.0 survives the trip. +// +// Each case provides the package to encode (input) and the package expected after decoding +// (want). They differ because some information is not representable in the SPDX 3.0 model: +// - Locations / FoundBy are not encoded per-package (ignored in the diff below). +// - The package ID is derived from the SPDX element ID rather than the original syft ID +// (the unexported id field is ignored in the diff below). +// - Type and Language are reconstructed from the package URL type. +// - Metadata is only reconstructed for the package types with dedicated handling in the +// SPDX encoder/decoder (apk, deb, rpm, java-archive, go-module); for all other types the +// decoded package carries no metadata. +func TestSPDX3JSONRoundTrip_AllPackageTypes(t *testing.T) { + ctx := context.Background() + + license := func(value string) pkg.LicenseSet { + return pkg.NewLicenseSet(pkg.NewLicenseWithContext(ctx, value)) + } + cpes := func(value string) []cpe.CPE { + return []cpe.CPE{cpe.Must(value, cpe.DeclaredSource)} + } + + cases := []struct { + name string + input pkg.Package + want pkg.Package + }{ + { + name: "python package (no reconstructed metadata)", + input: pkg.Package{ + Name: "package-python", + Version: "1.0.1", + Type: pkg.PythonPkg, + Language: pkg.Python, + Licenses: license("MIT"), + CPEs: cpes("cpe:2.3:a:python:package-python:1.0.1:*:*:*:*:*:*:*"), + Locations: file.NewLocationSet(file.NewLocation("/python")), + PURL: "pkg:pypi/package-python@1.0.1", + Metadata: pkg.PythonPackage{Name: "package-python", Version: "1.0.1"}, + }, + want: pkg.Package{ + Name: "package-python", + Version: "1.0.1", + Type: pkg.PythonPkg, + Language: pkg.Python, + Licenses: license("MIT"), + CPEs: cpes("cpe:2.3:a:python:package-python:1.0.1:*:*:*:*:*:*:*"), + PURL: "pkg:pypi/package-python@1.0.1", + }, + }, + { + name: "npm package (no reconstructed metadata)", + input: pkg.Package{ + Name: "package-npm", + Version: "2.0.1", + Type: pkg.NpmPkg, + Language: pkg.JavaScript, + Licenses: license("Apache-2.0"), + CPEs: cpes("cpe:2.3:a:npm:package-npm:2.0.1:*:*:*:*:*:*:*"), + Locations: file.NewLocationSet(file.NewLocation("/npm")), + PURL: "pkg:npm/package-npm@2.0.1", + Metadata: pkg.NpmPackage{Name: "package-npm", Version: "2.0.1"}, + }, + want: pkg.Package{ + Name: "package-npm", + Version: "2.0.1", + Type: pkg.NpmPkg, + Language: pkg.JavaScript, + Licenses: license("Apache-2.0"), + CPEs: cpes("cpe:2.3:a:npm:package-npm:2.0.1:*:*:*:*:*:*:*"), + PURL: "pkg:npm/package-npm@2.0.1", + }, + }, + { + name: "apk package", + input: pkg.Package{ + Name: "package-apk", + Version: "3.0", + Type: pkg.ApkPkg, + Licenses: license("GPL-2.0-only"), + CPEs: cpes("cpe:2.3:a:alpine:package-apk:3.0:*:*:*:*:*:*:*"), + Locations: file.NewLocationSet(file.NewLocation("/apk")), + PURL: "pkg:apk/alpine/package-apk@3.0?arch=x86_64&upstream=apk-origin", + Metadata: pkg.ApkDBEntry{ + Package: "package-apk", + OriginPackage: "apk-origin", + Maintainer: "Alpine Maintainer", + Version: "3.0", + Architecture: "x86_64", + Description: "the apk package", + }, + }, + want: pkg.Package{ + Name: "package-apk", + Version: "3.0", + Type: pkg.ApkPkg, + Licenses: license("GPL-2.0-only"), + CPEs: cpes("cpe:2.3:a:alpine:package-apk:3.0:*:*:*:*:*:*:*"), + PURL: "pkg:apk/alpine/package-apk@3.0?arch=x86_64&upstream=apk-origin", + Metadata: pkg.ApkDBEntry{ + Package: "package-apk", + OriginPackage: "apk-origin", + Maintainer: "Alpine Maintainer", + Version: "3.0", + Architecture: "x86_64", + Description: "the apk package", + }, + }, + }, + { + name: "deb package", + input: pkg.Package{ + Name: "package-deb", + Version: "4.0", + Type: pkg.DebPkg, + Licenses: license("LGPL-2.1-only"), + CPEs: cpes("cpe:2.3:a:debian:package-deb:4.0:*:*:*:*:*:*:*"), + Locations: file.NewLocationSet(file.NewLocation("/deb")), + PURL: "pkg:deb/debian/package-deb@4.0?arch=amd64&upstream=deb-src%404.1", + Metadata: pkg.DpkgDBEntry{ + Package: "package-deb", + Source: "deb-src", + Version: "4.0", + SourceVersion: "4.1", + Architecture: "amd64", + Maintainer: "Debian Maintainer", + }, + }, + want: pkg.Package{ + Name: "package-deb", + Version: "4.0", + Type: pkg.DebPkg, + Licenses: license("LGPL-2.1-only"), + CPEs: cpes("cpe:2.3:a:debian:package-deb:4.0:*:*:*:*:*:*:*"), + PURL: "pkg:deb/debian/package-deb@4.0?arch=amd64&upstream=deb-src%404.1", + Metadata: pkg.DpkgDBEntry{ + Package: "package-deb", + Source: "deb-src", + Version: "4.0", + SourceVersion: "4.1", + Architecture: "amd64", + Maintainer: "Debian Maintainer", + }, + }, + }, + { + name: "rpm package", + input: pkg.Package{ + Name: "package-rpm", + Version: "5.0", + Type: pkg.RpmPkg, + Licenses: license("BSD-3-Clause"), + CPEs: cpes("cpe:2.3:a:redhat:package-rpm:5.0:*:*:*:*:*:*:*"), + Locations: file.NewLocationSet(file.NewLocation("/rpm")), + PURL: "pkg:rpm/redhat/package-rpm@5.0?arch=x86_64&upstream=rpm-src-5.0", + Metadata: pkg.RpmDBEntry{ + Name: "package-rpm", + Version: "5.0", + Arch: "x86_64", + SourceRpm: "rpm-src-5.0", + Vendor: "RedHat", + }, + }, + want: pkg.Package{ + Name: "package-rpm", + Version: "5.0", + Type: pkg.RpmPkg, + Licenses: license("BSD-3-Clause"), + CPEs: cpes("cpe:2.3:a:redhat:package-rpm:5.0:*:*:*:*:*:*:*"), + PURL: "pkg:rpm/redhat/package-rpm@5.0?arch=x86_64&upstream=rpm-src-5.0", + Metadata: pkg.RpmDBEntry{ + Name: "package-rpm", + Version: "5.0", + Arch: "x86_64", + SourceRpm: "rpm-src-5.0", + Vendor: "RedHat", + }, + }, + }, + { + name: "java package", + input: pkg.Package{ + Name: "package-java", + Version: "6.0", + Type: pkg.JavaPkg, + Language: pkg.Java, + Licenses: license("EPL-2.0"), + CPEs: cpes("cpe:2.3:a:example:package-java:6.0:*:*:*:*:*:*:*"), + Locations: file.NewLocationSet(file.NewLocation("/java")), + PURL: "pkg:maven/com.example/package-java@6.0", + Metadata: pkg.JavaArchive{ + ArchiveDigests: []file.Digest{ + {Algorithm: "sha1", Value: "3b4ab96c371d913e2a88c269844b6c5fb5cbe761"}, + }, + }, + }, + want: pkg.Package{ + Name: "package-java", + Version: "6.0", + Type: pkg.JavaPkg, + Language: pkg.Java, + Licenses: license("EPL-2.0"), + CPEs: cpes("cpe:2.3:a:example:package-java:6.0:*:*:*:*:*:*:*"), + PURL: "pkg:maven/com.example/package-java@6.0", + Metadata: pkg.JavaArchive{ + ArchiveDigests: []file.Digest{ + {Algorithm: "sha1", Value: "3b4ab96c371d913e2a88c269844b6c5fb5cbe761"}, + }, + }, + }, + }, + { + name: "go module package", + input: pkg.Package{ + Name: "package-go", + Version: "7.0", + Type: pkg.GoModulePkg, + Language: pkg.Go, + Licenses: license("MPL-2.0"), + CPEs: cpes("cpe:2.3:a:example:package-go:7.0:*:*:*:*:*:*:*"), + Locations: file.NewLocationSet(file.NewLocation("/go")), + PURL: "pkg:golang/example.com/package-go@7.0", + Metadata: pkg.GolangBinaryBuildinfoEntry{ + H1Digest: "h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=", + }, + }, + want: pkg.Package{ + Name: "package-go", + Version: "7.0", + Type: pkg.GoModulePkg, + Language: pkg.Go, + Licenses: license("MPL-2.0"), + CPEs: cpes("cpe:2.3:a:example:package-go:7.0:*:*:*:*:*:*:*"), + PURL: "pkg:golang/example.com/package-go@7.0", + Metadata: pkg.GolangBinaryBuildinfoEntry{ + H1Digest: "h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=", + }, + }, + }, + } + + var inputs []pkg.Package + var want []pkg.Package + for _, c := range cases { + in := c.input + in.SetID() + inputs = append(inputs, in) + want = append(want, c.want) + } + + subject := sbom.SBOM{ + Artifacts: sbom.Artifacts{ + Packages: pkg.NewCollection(inputs...), + }, + Descriptor: sbom.Descriptor{ + Name: "syft", + Version: "v0.42.0-bogus", + }, + Source: source.Description{ + Metadata: source.DirectoryMetadata{Path: "/home/app", Base: "/home/app"}, + }, + } + + cfg := DefaultEncoderConfig() + cfg.Pretty = true + cfg.Version = spdxutil.V3_0 + + enc, err := NewFormatEncoderWithConfig(cfg) + require.NoError(t, err) + + var buf bytes.Buffer + require.NoError(t, enc.Encode(&buf, subject)) + + dec := NewFormatDecoder() + + id, version := dec.Identify(bytes.NewReader(buf.Bytes())) + require.Equal(t, ID, id) + require.Equal(t, spdxutil.V3_0, version) + + s, decodeID, decodeVersion, err := dec.Decode(bytes.NewReader(buf.Bytes())) + require.NoError(t, err) + require.NotNil(t, s) + require.Equal(t, ID, decodeID) + require.Equal(t, spdxutil.V3_0, decodeVersion) + + // the directory source should be recognized as the document root and not surface as a package + require.Equal(t, len(want), s.Artifacts.Packages.PackageCount()) + + got := s.Artifacts.Packages.Sorted() + sortByName(want) + sortByName(got) + + diff := cmp.Diff(want, got, + cmpopts.IgnoreUnexported(pkg.Package{}, pkg.LicenseSet{}, file.LocationSet{}), + cmpopts.IgnoreFields(pkg.Package{}, "Locations", "FoundBy"), + ) + require.Empty(t, diff, "decoded packages differ from expected (-want +got):\n%s", diff) +} + +func sortByName(pkgs []pkg.Package) { + sort.Slice(pkgs, func(i, j int) bool { + return pkgs[i].Name < pkgs[j].Name + }) +} + +// TestSPDX3JSONRoundTrip_FullSBOM encodes a more complete SBOM (a container image source, several +// packages, and relationships between them) as SPDX 3.0 JSON and decodes it back, verifying that +// the source description and the package relationships survive the round trip. +func TestSPDX3JSONRoundTrip_FullSBOM(t *testing.T) { + newPkg := func(name string) pkg.Package { + p := pkg.Package{ + Name: name, + Version: "1.0", + Type: pkg.NpmPkg, + Language: pkg.JavaScript, + PURL: "pkg:npm/" + name + "@1.0", + Locations: file.NewLocationSet( + file.NewLocation("/" + name), + ), + } + p.SetID() + return p + } + + app := newPkg("app") + lib := newPkg("lib") + dep := newPkg("dep") + + subject := sbom.SBOM{ + Artifacts: sbom.Artifacts{ + Packages: pkg.NewCollection(app, lib, dep), + }, + Relationships: []artifact.Relationship{ + // app contains lib + {From: app, To: lib, Type: artifact.ContainsRelationship}, + // dep is a dependency of lib + {From: dep, To: lib, Type: artifact.DependencyOfRelationship}, + }, + Descriptor: sbom.Descriptor{ + Name: "syft", + Version: "v0.42.0-bogus", + }, + Source: source.Description{ + Name: "some-image", + Version: "some-tag", + Supplier: "some-supplier", + Metadata: source.ImageMetadata{ + UserInput: "some-image:some-tag", + ManifestDigest: "sha256:abcdef0123456789", + Architecture: "amd64", + }, + }, + } + + cfg := DefaultEncoderConfig() + cfg.Pretty = true + cfg.Version = spdxutil.V3_0 + + enc, err := NewFormatEncoderWithConfig(cfg) + require.NoError(t, err) + + var buf bytes.Buffer + require.NoError(t, enc.Encode(&buf, subject)) + + dec := NewFormatDecoder() + s, decodeID, decodeVersion, err := dec.Decode(bytes.NewReader(buf.Bytes())) + require.NoError(t, err) + require.NotNil(t, s) + require.Equal(t, ID, decodeID) + require.Equal(t, spdxutil.V3_0, decodeVersion) + + // the container image source is decoded as the document root rather than a package + require.Equal(t, 3, s.Artifacts.Packages.PackageCount()) + + // source information survives, and the source is not surfaced as a package + // (the ID is re-derived from the SPDX element ID and is therefore not compared) + require.Equal(t, "some-image", s.Source.Name) + require.Equal(t, "some-tag", s.Source.Version) + require.Equal(t, "some-supplier", s.Source.Supplier) + require.IsType(t, source.ImageMetadata{}, s.Source.Metadata) + imageMetadata := s.Source.Metadata.(source.ImageMetadata) + // the user input is reconstructed from the image name and tag (version) + require.Equal(t, "some-image:some-tag", imageMetadata.UserInput) + require.Equal(t, "sha256:abcdef0123456789", imageMetadata.ManifestDigest) + + // relationships between packages survive, including direction + type rel struct { + from string + to string + typ artifact.RelationshipType + } + var got []rel + for _, r := range s.Relationships { + from, fromOk := r.From.(pkg.Package) + to, toOk := r.To.(pkg.Package) + require.True(t, fromOk, "relationship from should be a package: %+v", r.From) + require.True(t, toOk, "relationship to should be a package: %+v", r.To) + got = append(got, rel{from: from.Name, to: to.Name, typ: r.Type}) + } + + require.ElementsMatch(t, []rel{ + {from: "app", to: "lib", typ: artifact.ContainsRelationship}, + {from: "dep", to: "lib", typ: artifact.DependencyOfRelationship}, + }, got) +} diff --git a/syft/format/spdxjson/testdata/identify/3.0.json b/syft/format/spdxjson/testdata/identify/3.0.json new file mode 100644 index 000000000..9b082f57d --- /dev/null +++ b/syft/format/spdxjson/testdata/identify/3.0.json @@ -0,0 +1,141 @@ +{ + "@context" : "https://spdx.org/rdf/3.0.1/spdx-context.jsonld", + "@graph" : [ { + "@id" : "_:creationInfo_0", + "type" : "CreationInfo", + "specVersion" : "3.0.1", + "createdBy" : [ "https://swinslow.net/spdx-examples/example1/hello-v3-specv3/SPDXRef-gnrtd0" ], + "createdUsing" : [ "https://swinslow.net/spdx-examples/example1/hello-v3-specv3/additionalToolSPDXRef-gnrtd2", "https://swinslow.net/spdx-examples/example1/hello-v3-specv3/additionalToolSPDXRef-gnrtd1" ], + "created" : "2021-08-26T01:46:00Z" + }, { + "spdxId" : "https://swinslow.net/spdx-examples/example1/hello-v3-specv3/SPDXRef-gnrtd23", + "type" : "Relationship", + "relationshipType" : "hasDeclaredLicense", + "to" : [ "https://swinslow.net/spdx-examples/example1/hello-v3-specv3/SPDXRef-gnrtd8" ], + "from" : "https://swinslow.net/spdx-examples/example1/hello-v3-specv3/SPDXRef-gnrtd4", + "creationInfo" : "_:creationInfo_0" + }, { + "spdxId" : "https://swinslow.net/spdx-examples/example1/hello-v3-specv3/document0", + "type" : "SpdxDocument", + "dataLicense" : "https://swinslow.net/spdx-examples/example1/hello-v3-specv3/SPDXRef-gnrtd24", + "rootElement" : [ "https://swinslow.net/spdx-examples/example1/hello-v3-specv3/SPDXRef-gnrtd4" ], + "name" : "hello", + "creationInfo" : "_:creationInfo_0" + }, { + "spdxId" : "https://swinslow.net/spdx-examples/example1/hello-v3-specv3/additionalToolSPDXRef-gnrtd1", + "type" : "Tool", + "name" : "github.com/spdx/tools-golang/builder", + "creationInfo" : "_:creationInfo_0" + }, { + "spdxId" : "https://swinslow.net/spdx-examples/example1/hello-v3-specv3/additionalToolSPDXRef-gnrtd2", + "type" : "Tool", + "name" : "github.com/spdx/tools-golang/idsearcher", + "creationInfo" : "_:creationInfo_0" + }, { + "spdxId" : "https://swinslow.net/spdx-examples/example1/hello-v3-specv3/SPDXRef-gnrtd8", + "type" : "simplelicensing_LicenseExpression", + "simplelicensing_licenseExpression" : "GPL-3.0-or-later", + "creationInfo" : "_:creationInfo_0" + }, { + "spdxId" : "https://swinslow.net/spdx-examples/example1/hello-v3-specv3/SPDXRef-gnrtd20", + "type" : "simplelicensing_LicenseExpression", + "simplelicensing_licenseExpression" : "NOASSERTION", + "creationInfo" : "_:creationInfo_0" + }, { + "spdxId" : "https://swinslow.net/spdx-examples/example1/hello-v3-specv3/SPDXRef-gnrtd24", + "type" : "simplelicensing_LicenseExpression", + "simplelicensing_licenseExpression" : "CC0-1.0", + "creationInfo" : "_:creationInfo_0" + }, { + "spdxId" : "https://swinslow.net/spdx-examples/example1/hello-v3-specv3/SPDXRef-gnrtd14", + "type" : "LifecycleScopedRelationship", + "relationshipType" : "usesTool", + "scope" : "build", + "to" : [ "https://swinslow.net/spdx-examples/example1/hello-v3-specv3/SPDXRef-gnrtd13" ], + "completeness" : "noAssertion", + "from" : "https://swinslow.net/spdx-examples/example1/hello-v3-specv3/SPDXRef-gnrtd4", + "creationInfo" : "_:creationInfo_0" + }, { + "spdxId" : "https://swinslow.net/spdx-examples/example1/hello-v3-specv3/SPDXRef-gnrtd0", + "type" : "Person", + "externalIdentifier" : [ { + "type" : "ExternalIdentifier", + "identifier" : "steve@swinslow.net", + "externalIdentifierType" : "email" + } ], + "name" : "Steve Winslow", + "creationInfo" : "_:creationInfo_0" + }, { + "spdxId" : "https://swinslow.net/spdx-examples/example1/hello-v3-specv3/SPDXRef-gnrtd6", + "type" : "software_File", + "software_copyrightText" : "Copyright Contributors to the spdx-examples project.", + "verifiedUsing" : [ { + "type" : "Hash", + "algorithm" : "md5", + "hashValue" : "935054fe899ca782e11003bbae5e166c" + }, { + "type" : "Hash", + "algorithm" : "sha1", + "hashValue" : "20862a6d08391d07d09344029533ec644fac6b21" + }, { + "type" : "Hash", + "algorithm" : "sha256", + "hashValue" : "b4e5ca56d1f9110ca94ed0bf4e6d9ac11c2186eb7cd95159c6fdb50e8db5a823" + } ], + "name" : "./src/hello.c", + "software_primaryPurpose" : "source", + "creationInfo" : "_:creationInfo_0" + }, { + "spdxId" : "https://swinslow.net/spdx-examples/example1/hello-v3-specv3/SPDXRef-gnrtd11", + "type" : "software_File", + "software_copyrightText" : "NOASSERTION", + "verifiedUsing" : [ { + "type" : "Hash", + "algorithm" : "sha1", + "hashValue" : "20291a81ef065ff891b537b64d4fdccaf6f5ac02" + }, { + "type" : "Hash", + "algorithm" : "sha256", + "hashValue" : "83a33ff09648bb5fc5272baca88cf2b59fd81ac4cc6817b86998136af368708e" + }, { + "type" : "Hash", + "algorithm" : "md5", + "hashValue" : "08a12c966d776864cc1eb41fd03c3c3d" + } ], + "name" : "./build/hello", + "contentType" : "application/octet-stream", + "creationInfo" : "_:creationInfo_0" + }, { + "spdxId" : "https://swinslow.net/spdx-examples/example1/hello-v3-specv3/SPDXRef-gnrtd13", + "type" : "software_File", + "software_copyrightText" : "NOASSERTION", + "verifiedUsing" : [ { + "type" : "Hash", + "algorithm" : "sha1", + "hashValue" : "69a2e85696fff1865c3f0686d6c3824b59915c80" + }, { + "type" : "Hash", + "algorithm" : "sha256", + "hashValue" : "5da19033ba058e322e21c90e6d6d859c90b1b544e7840859c12cae5da005e79c" + }, { + "type" : "Hash", + "algorithm" : "md5", + "hashValue" : "559424589a4f3f75fd542810473d8bc1" + } ], + "name" : "./src/Makefile", + "software_primaryPurpose" : "source", + "creationInfo" : "_:creationInfo_0" + }, { + "spdxId" : "https://swinslow.net/spdx-examples/example1/hello-v3-specv3/SPDXRef-gnrtd4", + "type" : "software_Package", + "software_copyrightText" : "NOASSERTION", + "software_downloadLocation" : "git+https://github.com/swinslow/spdx-examples.git#example1/content", + "verifiedUsing" : [ { + "type" : "PackageVerificationCode", + "algorithm" : "sha1", + "hashValue" : "9d20237bb72087e87069f96afb41c6ca2fa2a342" + } ], + "name" : "hello", + "creationInfo" : "_:creationInfo_0" + } ] +} \ No newline at end of file diff --git a/test/cli/all_formats_convertible_test.go b/test/cli/all_formats_convertible_test.go index 8b5a27798..dad736913 100644 --- a/test/cli/all_formats_convertible_test.go +++ b/test/cli/all_formats_convertible_test.go @@ -29,6 +29,7 @@ func TestAllFormatsConvertable(t *testing.T) { {to: "spdx-json", from: "cyclonedx-json"}, {to: "cyclonedx-json", from: "syft-json"}, {to: "cyclonedx-json", from: "spdx-json"}, + {to: "spdx-json@3", from: "spdx-tag-value@2.1"}, } for _, test := range tests { diff --git a/test/cli/all_formats_expressible_test.go b/test/cli/all_formats_expressible_test.go index f13d4c883..fe1bd4564 100644 --- a/test/cli/all_formats_expressible_test.go +++ b/test/cli/all_formats_expressible_test.go @@ -65,6 +65,10 @@ func Test_formatVersionsExpressible(t *testing.T) { format: "spdx-json@2.3", assertion: assertInOutput(`"spdxVersion":"SPDX-2.3"`), }, + { + format: "spdx-json@3.0", + assertion: assertInOutput(`https://spdx.org/rdf/3.0`), + }, } for _, test := range tests { diff --git a/test/cli/convert_cmd_test.go b/test/cli/convert_cmd_test.go index 944eb4876..04df14910 100644 --- a/test/cli/convert_cmd_test.go +++ b/test/cli/convert_cmd_test.go @@ -28,6 +28,7 @@ func TestConvertCmd(t *testing.T) { }{ {from: "syft-json", to: "spdx-tag-value", expect: mustEncoder(spdxtagvalue.NewFormatEncoderWithConfig(spdxtagvalue.DefaultEncoderConfig()))}, {from: "syft-json", to: "spdx-json", expect: mustEncoder(spdxjson.NewFormatEncoderWithConfig(spdxjson.DefaultEncoderConfig()))}, + {from: "syft-json", to: "spdx-json@3.0", expect: mustEncoder(spdxjson.NewFormatEncoderWithConfig(spdxjson.DefaultEncoderConfig()))}, {from: "syft-json", to: "cyclonedx-json", expect: mustEncoder(cyclonedxjson.NewFormatEncoderWithConfig(cyclonedxjson.DefaultEncoderConfig()))}, {from: "syft-json", to: "cyclonedx-xml", expect: mustEncoder(cyclonedxxml.NewFormatEncoderWithConfig(cyclonedxxml.DefaultEncoderConfig()))}, } diff --git a/test/cli/spdx_json_schema_test.go b/test/cli/spdx_json_schema_test.go index 61c6972a3..4df213c1d 100644 --- a/test/cli/spdx_json_schema_test.go +++ b/test/cli/spdx_json_schema_test.go @@ -31,7 +31,7 @@ func TestSPDXJSONSchema(t *testing.T) { { name: "scan:image:docker-archive:pkg-coverage", subcommand: "scan", - args: []string{"-o", "spdx-json"}, + args: []string{"-o", "spdx-json@2.3"}, fixture: imageFixture, }, { diff --git a/test/cli/spdx_tooling_validation_test.go b/test/cli/spdx_tooling_validation_test.go index 5d1444007..4eaa3ba89 100644 --- a/test/cli/spdx_tooling_validation_test.go +++ b/test/cli/spdx_tooling_validation_test.go @@ -2,23 +2,20 @@ package cli import ( "fmt" - "os" "os/exec" - "path" "path/filepath" "strings" "testing" "github.com/stretchr/testify/require" +) - "github.com/anchore/stereoscope/pkg/imagetest" +const ( + validatorV2 = "ghcr.io/spdx/tools-java/tools-java:v1.1.8@sha256:c3b9e848083132e03b30302576b9b51adffd454f43c786f1708cc37c0861a2aa" + validatorV3 = "ghcr.io/spdx/tools-java/tools-java:v2.0.4@sha256:15062f85b4be9688c7bf42df34ad6b84e084ed46e262e1f2dc1603795de9f7b4" ) func TestSpdxValidationTooling(t *testing.T) { - // note: the external tooling requires that the daemon explicitly has the image loaded, not just that - // we can get the image from a cache tar. - imgTag := imagetest.LoadFixtureImageIntoDocker(t, "image-java-spdx-tools") - images := []string{ "alpine:3.17.3@sha256:b6ca290b6b4cdcca5b3db3ffa338ee0285c11744b4a6abaa9627746ee3291d8d", "photon:3.0@sha256:888675e193418d924feea262cf639c46532b63c2027a39fd3ac75383b3c1130e", @@ -32,43 +29,44 @@ func TestSpdxValidationTooling(t *testing.T) { } tests := []struct { - name string - syftArgs []string - images []string - setup func(t *testing.T) - env map[string]string + name string + format string + validator string }{ { - name: "spdx validation tooling tag value", - syftArgs: []string{"scan", "-o", "spdx"}, - images: images, - env: env, + name: "spdx 2.3 validation tooling tag value", + format: "spdx", }, { - name: "spdx validation tooling json", - syftArgs: []string{"scan", "-o", "spdx-json"}, - images: images, - env: env, + name: "spdx 2.3 validation tooling json", + format: "spdx-json", }, { - name: "spdx validation tooling tag value", - syftArgs: []string{"scan", "-o", "spdx@2.2"}, - images: images, - env: env, + name: "spdx 3.0 validation tooling json", + format: "spdx-json@3.0", + validator: validatorV3, }, { - name: "spdx validation tooling json", - syftArgs: []string{"scan", "-o", "spdx-json@2.2"}, - images: images, - env: env, + name: "spdx 2.2 validation tooling tag value", + format: "spdx@2.2", + }, + { + name: "spdx 2.2 validation tooling json", + format: "spdx-json@2.2", }, } - for _, test := range tests { - for _, image := range test.images { - t.Run(test.name+"_"+image, func(t *testing.T) { + for _, image := range images { + syftJsonFile := filepath.Join(t.TempDir(), "sbom.syft.json") - args := append(test.syftArgs, image) + cmd, _, stderr := runSyft(t, env, "-o", "syft-json", "--file", syftJsonFile, image) + if cmd.ProcessState.ExitCode() != 0 { + t.Fatalf("failed to run syft: %s", stderr) + } + + for _, test := range tests { + t.Run(test.name+"_"+image, func(t *testing.T) { + t.Parallel() var suffix string if strings.Contains(test.name, "json") { @@ -78,30 +76,27 @@ func TestSpdxValidationTooling(t *testing.T) { } dir := t.TempDir() - sbomPath := filepath.Join(dir, fmt.Sprintf("sbom%s", suffix)) + sbomFile := fmt.Sprintf("sbom%s", suffix) + sbomPath := filepath.Join(dir, sbomFile) - args = append(args, "--file", sbomPath) - - cmd, _, stderr := runSyft(t, test.env, args...) + cmd, _, stderr = runSyft(t, nil, "convert", syftJsonFile, "-o", test.format, "--file", sbomPath) if cmd.ProcessState.ExitCode() != 0 { - t.Fatalf("failed to run syft: %s", stderr) + t.Fatalf("failed to run syft convert: %s", stderr) } - cwd, err := os.Getwd() - require.NoError(t, err) + if test.validator == "" { + test.validator = validatorV2 + } // validate against spdx java tooling - fileArg := fmt.Sprintf("DIR=%s", dir) - mountArg := fmt.Sprintf("BASE=%s", path.Base(sbomPath)) - imageArg := fmt.Sprintf("IMAGE=%s", imgTag) - - validateCmd := exec.Command("make", "validate", fileArg, mountArg, imageArg) - validateCmd.Dir = filepath.Join(cwd, "testdata", "image-java-spdx-tools") + validateCmd := exec.Command("docker", "run", "--rm", "-i", "--platform", "linux/amd64", + "-v", dir+":/data", test.validator, "Verify", "/data/"+sbomFile) stdout, stderr, err := runCommand(validateCmd, map[string]string{}) if err != nil { t.Fatalf("invalid SPDX document:%v\nSTDOUT:\n%s\nSTDERR:\n%s", err, stdout, stderr) } + require.Contains(t, stdout, "SPDX Document is valid") }) } } diff --git a/test/cli/testdata/image-java-spdx-tools/Dockerfile b/test/cli/testdata/image-java-spdx-tools/Dockerfile deleted file mode 100644 index 938ca24d0..000000000 --- a/test/cli/testdata/image-java-spdx-tools/Dockerfile +++ /dev/null @@ -1,7 +0,0 @@ -FROM openjdk:11@sha256:e81b7f317654b0f26d3993e014b04bcb29250339b11b9de41e130feecd4cd43c - -RUN wget --no-verbose https://github.com/spdx/tools-java/releases/download/v1.1.3/tools-java-1.1.3.zip && \ - unzip tools-java-1.1.3.zip && \ - rm tools-java-1.1.3.zip - -ENTRYPOINT ["java", "-jar", "tools-java-1.1.3-jar-with-dependencies.jar"] diff --git a/test/cli/testdata/image-java-spdx-tools/Makefile b/test/cli/testdata/image-java-spdx-tools/Makefile deleted file mode 100644 index a6bcc26ab..000000000 --- a/test/cli/testdata/image-java-spdx-tools/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -all: build validate - -IMAGE := "spdx-java-tools:latest" - -.PHONY: build -build: - docker build -t spdx-java-tools:latest . - -validate: - docker run --rm -v $(DIR):/home/build/ $(IMAGE) Verify /home/build/$(BASE)