diff --git a/cmd/syft/internal/options/output.go b/cmd/syft/internal/options/output.go index 6d6233b7b..c53ea3ef9 100644 --- a/cmd/syft/internal/options/output.go +++ b/cmd/syft/internal/options/output.go @@ -12,6 +12,7 @@ import ( "github.com/anchore/syft/syft/format/cyclonedxjson" "github.com/anchore/syft/syft/format/cyclonedxxml" "github.com/anchore/syft/syft/format/github" + "github.com/anchore/syft/syft/format/purls" "github.com/anchore/syft/syft/format/spdxjson" "github.com/anchore/syft/syft/format/spdxtagvalue" "github.com/anchore/syft/syft/format/syftjson" @@ -127,6 +128,7 @@ func supportedIDs() []sbom.FormatID { table.ID, text.ID, template.ID, + purls.ID, // encoders that support multiple versions cyclonedxxml.ID, diff --git a/syft/format/common/spdxhelpers/to_syft_model.go b/syft/format/common/spdxhelpers/to_syft_model.go index 5037e5b11..280eba20d 100644 --- a/syft/format/common/spdxhelpers/to_syft_model.go +++ b/syft/format/common/spdxhelpers/to_syft_model.go @@ -18,6 +18,7 @@ import ( "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/cpe" "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/format/internal" "github.com/anchore/syft/syft/format/internal/spdxutil/helpers" "github.com/anchore/syft/syft/license" "github.com/anchore/syft/syft/linux" @@ -509,6 +510,8 @@ func toSyftPackage(p *spdx.Package) pkg.Package { Metadata: extractMetadata(p, info), } + internal.Backfill(sP) + if p.PackageSPDXIdentifier != "" { // always prefer the IDs from the SBOM over derived IDs sP.OverrideID(artifact.ID(p.PackageSPDXIdentifier)) diff --git a/syft/format/cyclonedxjson/decoder.go b/syft/format/cyclonedxjson/decoder.go index 4a6279c7a..952d571ae 100644 --- a/syft/format/cyclonedxjson/decoder.go +++ b/syft/format/cyclonedxjson/decoder.go @@ -40,6 +40,10 @@ func (d decoder) Decode(r io.Reader) (*sbom.SBOM, sbom.FormatID, string, error) return nil, "", "", fmt.Errorf("unsupported cyclonedx json document version") } + _, err = reader.Seek(0, io.SeekStart) + if err != nil { + return nil, id, version, fmt.Errorf("unable to seek to start of CycloneDX JSON SBOM: %+v", err) + } doc, err := d.decoder.Decode(reader) if err != nil { return nil, id, version, fmt.Errorf("unable to decode cyclonedx json document: %w", err) diff --git a/syft/format/cyclonedxjson/decoder_test.go b/syft/format/cyclonedxjson/decoder_test.go index b38ad37ba..90ace0428 100644 --- a/syft/format/cyclonedxjson/decoder_test.go +++ b/syft/format/cyclonedxjson/decoder_test.go @@ -2,6 +2,7 @@ package cyclonedxjson import ( "fmt" + "io" "os" "path/filepath" "strings" @@ -43,6 +44,7 @@ func TestDecoder_Decode(t *testing.T) { t.Run(test.file, func(t *testing.T) { reader, err := os.Open(filepath.Join("test-fixtures", test.file)) require.NoError(t, err) + reset := func() { _, err = reader.Seek(0, io.SeekStart); require.NoError(t, err) } dec := NewFormatDecoder() @@ -51,6 +53,7 @@ func TestDecoder_Decode(t *testing.T) { assert.Equal(t, sbom.FormatID(""), formatID) assert.Equal(t, "", formatVersion) + reset() _, decodeID, decodeVersion, err := dec.Decode(reader) require.Error(t, err) assert.Equal(t, sbom.FormatID(""), decodeID) @@ -61,6 +64,7 @@ func TestDecoder_Decode(t *testing.T) { assert.Equal(t, ID, formatID) assert.NotEmpty(t, formatVersion) + reset() bom, decodeID, decodeVersion, err := dec.Decode(reader) require.NotNil(t, bom) require.NoError(t, err) diff --git a/syft/format/cyclonedxxml/decoder.go b/syft/format/cyclonedxxml/decoder.go index 411d237bd..23f6526c7 100644 --- a/syft/format/cyclonedxxml/decoder.go +++ b/syft/format/cyclonedxxml/decoder.go @@ -41,6 +41,10 @@ func (d decoder) Decode(r io.Reader) (*sbom.SBOM, sbom.FormatID, string, error) return nil, "", "", fmt.Errorf("unsupported cyclonedx xml document version") } + _, err = reader.Seek(0, io.SeekStart) + if err != nil { + return nil, id, version, fmt.Errorf("unable to seek to start of CycloneDX XML SBOM: %w", err) + } doc, err := d.decoder.Decode(reader) if err != nil { return nil, id, version, fmt.Errorf("unable to decode cyclonedx xml document: %w", err) diff --git a/syft/format/cyclonedxxml/decoder_test.go b/syft/format/cyclonedxxml/decoder_test.go index b0d895141..33f962200 100644 --- a/syft/format/cyclonedxxml/decoder_test.go +++ b/syft/format/cyclonedxxml/decoder_test.go @@ -2,6 +2,7 @@ package cyclonedxxml import ( "fmt" + "io" "os" "path/filepath" "strings" @@ -44,6 +45,8 @@ func TestDecoder_Decode(t *testing.T) { reader, err := os.Open(filepath.Join("test-fixtures", test.file)) require.NoError(t, err) + reset := func() { _, err = reader.Seek(0, io.SeekStart); require.NoError(t, err) } + dec := NewFormatDecoder() formatID, formatVersion := dec.Identify(reader) @@ -51,6 +54,7 @@ func TestDecoder_Decode(t *testing.T) { assert.Equal(t, sbom.FormatID(""), formatID) assert.Equal(t, "", formatVersion) + reset() _, decodeID, decodeVersion, err := dec.Decode(reader) require.Error(t, err) assert.Equal(t, sbom.FormatID(""), decodeID) @@ -61,6 +65,7 @@ func TestDecoder_Decode(t *testing.T) { assert.Equal(t, ID, formatID) assert.NotEmpty(t, formatVersion) + reset() bom, decodeID, decodeVersion, err := dec.Decode(reader) require.NotNil(t, bom) require.NoError(t, err) diff --git a/syft/format/decoders.go b/syft/format/decoders.go index 91a2974ad..6ca1f94a2 100644 --- a/syft/format/decoders.go +++ b/syft/format/decoders.go @@ -5,6 +5,7 @@ import ( "github.com/anchore/syft/syft/format/cyclonedxjson" "github.com/anchore/syft/syft/format/cyclonedxxml" + "github.com/anchore/syft/syft/format/purls" "github.com/anchore/syft/syft/format/spdxjson" "github.com/anchore/syft/syft/format/spdxtagvalue" "github.com/anchore/syft/syft/format/syftjson" @@ -24,6 +25,7 @@ func Decoders() []sbom.FormatDecoder { cyclonedxjson.NewFormatDecoder(), spdxtagvalue.NewFormatDecoder(), spdxjson.NewFormatDecoder(), + purls.NewFormatDecoder(), } } diff --git a/syft/format/decoders_collection.go b/syft/format/decoders_collection.go index 5a0e39a6b..69ffaa1a3 100644 --- a/syft/format/decoders_collection.go +++ b/syft/format/decoders_collection.go @@ -34,6 +34,10 @@ func (c *DecoderCollection) Decode(r io.Reader) (*sbom.SBOM, sbom.FormatID, stri var bestID sbom.FormatID for _, d := range c.decoders { + _, err = reader.Seek(0, io.SeekStart) + if err != nil { + return nil, "", "", fmt.Errorf("unable to seek to start of SBOM: %w", err) + } id, version := d.Identify(reader) if id == "" || version == "" { if id != "" { @@ -42,6 +46,10 @@ func (c *DecoderCollection) Decode(r io.Reader) (*sbom.SBOM, sbom.FormatID, stri continue } + _, err = reader.Seek(0, io.SeekStart) + if err != nil { + return nil, "", "", fmt.Errorf("unable to seek to start of SBOM: %w", err) + } return d.Decode(reader) } @@ -65,6 +73,10 @@ func (c *DecoderCollection) Identify(r io.Reader) (sbom.FormatID, string) { } for _, d := range c.decoders { + _, err = reader.Seek(0, io.SeekStart) + if err != nil { + log.Debugf("unable to seek to start of SBOM: %v", err) + } id, version := d.Identify(reader) if id != "" && version != "" { return id, version diff --git a/syft/format/encoders.go b/syft/format/encoders.go index 8bc38d98e..7d9958859 100644 --- a/syft/format/encoders.go +++ b/syft/format/encoders.go @@ -8,6 +8,7 @@ import ( "github.com/anchore/syft/syft/format/cyclonedxjson" "github.com/anchore/syft/syft/format/cyclonedxxml" "github.com/anchore/syft/syft/format/github" + "github.com/anchore/syft/syft/format/purls" "github.com/anchore/syft/syft/format/spdxjson" "github.com/anchore/syft/syft/format/spdxtagvalue" "github.com/anchore/syft/syft/format/syftjson" @@ -62,6 +63,7 @@ func (o EncodersConfig) Encoders() ([]sbom.FormatEncoder, error) { l.addWithErr(syftjson.ID)(o.syftJSONEncoders()) l.add(table.ID)(table.NewFormatEncoder()) l.add(text.ID)(text.NewFormatEncoder()) + l.add(purls.ID)(purls.NewFormatEncoder()) l.add(github.ID)(github.NewFormatEncoder()) l.addWithErr(cyclonedxxml.ID)(o.cyclonedxXMLEncoders()) l.addWithErr(cyclonedxjson.ID)(o.cyclonedxJSONEncoders()) diff --git a/syft/format/encoders_test.go b/syft/format/encoders_test.go index 7595725da..9aa83f52e 100644 --- a/syft/format/encoders_test.go +++ b/syft/format/encoders_test.go @@ -37,6 +37,7 @@ func expectedDefaultEncoders() *strset.Set { expected.Add("syft-table@") // no version expected.Add("syft-text@") // no version expected.Add("github-json@") // no version + expected.Add("purls@") // no version for _, v := range spdxjson.SupportedVersions() { expected.Add("spdx-json@" + v) } @@ -108,6 +109,7 @@ func TestEncodersConfig_Encoders(t *testing.T) { expected.Add("syft-table@") // no version expected.Add("syft-text@") // no version expected.Add("github-json@") // no version + expected.Add("purls@") // no version expected.Add("spdx-json@" + spdxutil.DefaultVersion) expected.Add("spdx-tag-value@" + spdxutil.DefaultVersion) expected.Add("cyclonedx-json@" + cyclonedxutil.DefaultVersion) diff --git a/syft/format/internal/backfill.go b/syft/format/internal/backfill.go new file mode 100644 index 000000000..7bfa0aa2b --- /dev/null +++ b/syft/format/internal/backfill.go @@ -0,0 +1,137 @@ +package internal + +import ( + "fmt" + "regexp" + "slices" + "strings" + + "github.com/anchore/packageurl-go" + "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/syft/cpe" + "github.com/anchore/syft/syft/pkg" +) + +// Backfill takes all information present in the package and attempts to fill in any missing information +// from any available sources, such as the Metadata and PURL. +// +// Backfill does not call p.SetID(), but this needs to be called later to ensure it's up to date +func Backfill(p *pkg.Package) { + if p.PURL == "" { + return + } + + purl, err := packageurl.FromString(p.PURL) + if err != nil { + log.Debug("unable to parse purl: %s: %w", p.PURL, err) + return + } + + var cpes []cpe.CPE + epoch := "" + + for _, qualifier := range purl.Qualifiers { + switch qualifier.Key { + case pkg.PURLQualifierCPES: + rawCpes := strings.Split(qualifier.Value, ",") + for _, rawCpe := range rawCpes { + c, err := cpe.New(rawCpe, cpe.DeclaredSource) + if err != nil { + log.Debugf("unable to decode cpe %s in purl %s: %w", rawCpe, p.PURL, err) + continue + } + cpes = append(cpes, c) + } + case pkg.PURLQualifierEpoch: + epoch = qualifier.Value + } + } + + if p.Type == "" { + p.Type = pkg.TypeFromPURL(p.PURL) + } + if p.Language == "" { + p.Language = pkg.LanguageFromPURL(p.PURL) + } + if p.Name == "" { + p.Name = nameFromPurl(purl) + } + + setVersionFromPurl(p, purl, epoch) + + if p.Language == pkg.Java { + setJavaMetadataFromPurl(p, purl) + } + + for _, c := range cpes { + if slices.Contains(p.CPEs, c) { + continue + } + p.CPEs = append(p.CPEs, c) + } +} + +func setJavaMetadataFromPurl(p *pkg.Package, purl packageurl.PackageURL) { + if p.Type != pkg.JavaPkg { + return + } + if purl.Namespace != "" { + if p.Metadata == nil { + p.Metadata = pkg.JavaArchive{} + } + meta, got := p.Metadata.(pkg.JavaArchive) + if got && meta.PomProperties == nil { + meta.PomProperties = &pkg.JavaPomProperties{} + p.Metadata = meta + } + if meta.PomProperties != nil { + // capture the group id from the purl if it is not already set + if meta.PomProperties.ArtifactID == "" { + meta.PomProperties.ArtifactID = purl.Name + } + if meta.PomProperties.GroupID == "" { + meta.PomProperties.GroupID = purl.Namespace + } + if meta.PomProperties.Version == "" { + meta.PomProperties.Version = purl.Version + } + } + } +} + +func setVersionFromPurl(p *pkg.Package, purl packageurl.PackageURL, epoch string) { + if p.Version == "" { + p.Version = purl.Version + } + + if epoch != "" && p.Type == pkg.RpmPkg && !epochPrefix.MatchString(p.Version) { + p.Version = fmt.Sprintf("%s:%s", epoch, p.Version) + } +} + +var epochPrefix = regexp.MustCompile(`^\d+:`) + +// nameFromPurl returns the syft package name of the package from the purl. If the purl includes a namespace, +// the name is prefixed as appropriate based on the PURL type +func nameFromPurl(purl packageurl.PackageURL) string { + if !nameExcludesPurlNamespace(purl.Type) && purl.Namespace != "" { + return fmt.Sprintf("%s/%s", purl.Namespace, purl.Name) + } + return purl.Name +} + +func nameExcludesPurlNamespace(purlType string) bool { + switch purlType { + case packageurl.TypeAlpine, + packageurl.TypeAlpm, + packageurl.TypeConan, + packageurl.TypeCpan, + packageurl.TypeDebian, + packageurl.TypeMaven, + packageurl.TypeQpkg, + packageurl.TypeRPM, + packageurl.TypeSWID: + return true + } + return false +} diff --git a/syft/format/internal/backfill_test.go b/syft/format/internal/backfill_test.go new file mode 100644 index 000000000..fa5baf732 --- /dev/null +++ b/syft/format/internal/backfill_test.go @@ -0,0 +1,177 @@ +package internal + +import ( + "testing" + + "github.com/stretchr/testify/require" + + "github.com/anchore/packageurl-go" + "github.com/anchore/syft/syft/cpe" + "github.com/anchore/syft/syft/pkg" +) + +func Test_Backfill(t *testing.T) { + tests := []struct { + name string + in pkg.Package + expected pkg.Package + }{ + { + name: "npm type", + in: pkg.Package{ + PURL: "pkg:npm/test@3.0.0", + }, + expected: pkg.Package{ + PURL: "pkg:npm/test@3.0.0", + Type: pkg.NpmPkg, + Language: pkg.JavaScript, + Name: "test", + Version: "3.0.0", + }, + }, + { + name: "rpm no epoch", + in: pkg.Package{ + PURL: "pkg:rpm/redhat/dbus-common@1.12.8-26.el8?arch=noarch&distro=rhel-8.10&upstream=dbus-1.12.8-26.el8.src.rpm", + }, + expected: pkg.Package{ + PURL: "pkg:rpm/redhat/dbus-common@1.12.8-26.el8?arch=noarch&distro=rhel-8.10&upstream=dbus-1.12.8-26.el8.src.rpm", + Type: pkg.RpmPkg, + Name: "dbus-common", + Version: "1.12.8-26.el8", + }, + }, + { + name: "rpm epoch", + in: pkg.Package{ + PURL: "pkg:rpm/redhat/dbus-common@1.12.8-26.el8?arch=noarch&distro=rhel-8.10&epoch=1&upstream=dbus-1.12.8-26.el8.src.rpm", + }, + expected: pkg.Package{ + PURL: "pkg:rpm/redhat/dbus-common@1.12.8-26.el8?arch=noarch&distro=rhel-8.10&epoch=1&upstream=dbus-1.12.8-26.el8.src.rpm", + Type: pkg.RpmPkg, + Name: "dbus-common", + Version: "1:1.12.8-26.el8", + }, + }, + { + name: "bad cpe", + in: pkg.Package{ + PURL: "pkg:npm/testp@3.0.0?cpes=cpe:2.3a:testv:testp:3.0.0:*:*:*:*:*:*:*", + }, + expected: pkg.Package{ + PURL: "pkg:npm/testp@3.0.0?cpes=cpe:2.3a:testv:testp:3.0.0:*:*:*:*:*:*:*", + Type: pkg.NpmPkg, + Language: pkg.JavaScript, + Name: "testp", + Version: "3.0.0", + }, + }, + { + name: "good cpe", + in: pkg.Package{ + PURL: "pkg:npm/testp@3.0.0?cpes=cpe:2.3:a:testv:testp:3.0.0:*:*:*:*:*:*:*", + }, + expected: pkg.Package{ + PURL: "pkg:npm/testp@3.0.0?cpes=cpe:2.3:a:testv:testp:3.0.0:*:*:*:*:*:*:*", + Type: pkg.NpmPkg, + Language: pkg.JavaScript, + Name: "testp", + Version: "3.0.0", + CPEs: []cpe.CPE{ + { + Attributes: cpe.Attributes{ + Part: "a", + Vendor: "testv", + Product: "testp", + Version: "3.0.0", + }, + Source: cpe.DeclaredSource, + }, + }, + }, + }, + { + name: "java type", + in: pkg.Package{ + PURL: "pkg:maven/org.apache/some-thing@1.2.3", + }, + expected: pkg.Package{ + PURL: "pkg:maven/org.apache/some-thing@1.2.3", + Type: pkg.JavaPkg, + Language: pkg.Java, + Name: "some-thing", + Version: "1.2.3", + Metadata: pkg.JavaArchive{ + PomProperties: &pkg.JavaPomProperties{ + GroupID: "org.apache", + ArtifactID: "some-thing", + Version: "1.2.3", + }, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + Backfill(&tt.in) + tt.in.OverrideID("") + require.Equal(t, tt.expected, tt.in) + }) + } +} + +func Test_nameFromPurl(t *testing.T) { + tests := []struct { + in string + expected string + }{ + { + in: "pkg:npm/some-name@3.0.0", + expected: "some-name", + }, + { + in: "pkg:maven/org.apache/some-name@1.2.3", + expected: "some-name", + }, + { + in: "pkg:deb/debian/some-name@3.0.0", + expected: "some-name", + }, + { + in: "pkg:rpm/redhat/some-name@3.0.0", + expected: "some-name", + }, + { + in: "pkg:gem/some-name@3.0.0", + expected: "some-name", + }, + { + in: "pkg:apk/alpine/some-name@3.0.0", + expected: "some-name", + }, + { + in: "pkg:docker/some-org/some-name@3.0.0", + expected: "some-org/some-name", + }, + { + in: "pkg:npm/some-name@3.0.0", + expected: "some-name", + }, + { + in: "pkg:npm/some-org/some-name@3.0.0", + expected: "some-org/some-name", + }, + { + in: "pkg:oci/library/mysql@8.1.0", + expected: "library/mysql", + }, + } + for _, tt := range tests { + t.Run(tt.in, func(t *testing.T) { + p, err := packageurl.FromString(tt.in) + require.NoError(t, err) + got := nameFromPurl(p) + require.Equal(t, tt.expected, got) + }) + } +} diff --git a/syft/format/internal/cyclonedxutil/decoder.go b/syft/format/internal/cyclonedxutil/decoder.go index 0eb6c5310..77724a7d3 100644 --- a/syft/format/internal/cyclonedxutil/decoder.go +++ b/syft/format/internal/cyclonedxutil/decoder.go @@ -1,12 +1,9 @@ package cyclonedxutil import ( - "fmt" "io" "github.com/CycloneDX/cyclonedx-go" - - "github.com/anchore/syft/syft/format/internal/stream" ) type Decoder struct { @@ -20,19 +17,10 @@ func NewDecoder(format cyclonedx.BOMFileFormat) Decoder { } func (d Decoder) Decode(r io.Reader) (*cyclonedx.BOM, error) { - reader, err := stream.SeekableReader(r) - if err != nil { - return nil, err - } - doc := &cyclonedx.BOM{ Components: &[]cyclonedx.Component{}, } - if _, err := reader.Seek(0, io.SeekStart); err != nil { - return nil, fmt.Errorf("unable to seek to start of CycloneDX SBOM: %w", err) - } - - err = cyclonedx.NewBOMDecoder(reader, d.format).Decode(doc) + err := cyclonedx.NewBOMDecoder(r, d.format).Decode(doc) if err != nil { return nil, err } diff --git a/syft/format/internal/cyclonedxutil/helpers/component.go b/syft/format/internal/cyclonedxutil/helpers/component.go index 526094ac5..ce060d3af 100644 --- a/syft/format/internal/cyclonedxutil/helpers/component.go +++ b/syft/format/internal/cyclonedxutil/helpers/component.go @@ -1,12 +1,14 @@ package helpers import ( + "fmt" "reflect" "github.com/CycloneDX/cyclonedx-go" "github.com/anchore/packageurl-go" "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/format/internal" "github.com/anchore/syft/syft/internal/packagemetadata" "github.com/anchore/syft/syft/pkg" ) @@ -84,7 +86,6 @@ func decodeComponent(c *cyclonedx.Component) *pkg.Package { } p := &pkg.Package{ - Name: c.Name, Version: c.Version, Locations: decodeLocations(values), Licenses: pkg.NewLicenseSet(decodeLicenses(c)...), @@ -102,13 +103,50 @@ func decodeComponent(c *cyclonedx.Component) *pkg.Package { p.Type = pkg.TypeFromPURL(p.PURL) } - if p.Language == "" { - p.Language = pkg.LanguageFromPURL(p.PURL) - } + setPackageName(p, c) + + internal.Backfill(p) + p.SetID() return p } +func setPackageName(p *pkg.Package, c *cyclonedx.Component) { + name := c.Name + if c.Group != "" { + switch p.Type { + case pkg.JavaPkg: + if p.Metadata == nil { + p.Metadata = pkg.JavaArchive{} + } + var pomProperties *pkg.JavaPomProperties + javaMetadata, ok := p.Metadata.(pkg.JavaArchive) + if ok { + pomProperties = javaMetadata.PomProperties + if pomProperties == nil { + pomProperties = &pkg.JavaPomProperties{} + javaMetadata.PomProperties = pomProperties + p.Metadata = javaMetadata + } + } + if pomProperties != nil { + if pomProperties.ArtifactID == "" { + pomProperties.ArtifactID = c.Name + } + if pomProperties.GroupID == "" { + pomProperties.GroupID = c.Group + } + if pomProperties.Version == "" { + pomProperties.Version = p.Version + } + } + default: + name = fmt.Sprintf("%s/%s", c.Group, name) + } + } + p.Name = name +} + func decodeLocations(vals map[string]string) file.LocationSet { v := Decode(reflect.TypeOf([]file.Location{}), vals, "syft:location", CycloneDXFields) out, ok := v.([]file.Location) diff --git a/syft/format/internal/stream/seekable_reader.go b/syft/format/internal/stream/seekable_reader.go index e9841892c..a4dd9e060 100644 --- a/syft/format/internal/stream/seekable_reader.go +++ b/syft/format/internal/stream/seekable_reader.go @@ -6,13 +6,18 @@ import ( "io" ) +// SeekableReader takes an io.Reader and returns an io.ReadSeeker relative to the current position of the reader. +// Users of this function expect to be able to reset the reader to the current position, not potentially reset the +// reader prior to the location when this reader is provided. An example is a reader with multiple JSON +// documents separated by newlines (JSONL). After reading the first document, if a call is made to decode +// the second and Seek(0, SeekStart) is called it would reset the overall reader back to the first document. func SeekableReader(reader io.Reader) (io.ReadSeeker, error) { if reader == nil { return nil, fmt.Errorf("no bytes provided") } if r, ok := reader.(io.ReadSeeker); ok { - return r, nil + return getOffsetReadSeeker(r) } content, err := io.ReadAll(reader) @@ -22,3 +27,53 @@ func SeekableReader(reader io.Reader) (io.ReadSeeker, error) { return bytes.NewReader(content), nil } + +type offsetReadSeeker struct { + rdr io.ReadSeeker + offset int64 +} + +// getOffsetReadSeeker returns a new io.ReadSeeker that may wrap another io.ReadSeeker with the current offset, so +// seek calls will be relative to the _current_ position, rather than relative to the reader itself +func getOffsetReadSeeker(r io.ReadSeeker) (io.ReadSeeker, error) { + if r == nil { + return nil, fmt.Errorf("no reader provided") + } + pos, err := r.Seek(0, io.SeekCurrent) + if pos == 0 { + // if the ReadSeeker is currently at 0, we don't need to track an offset + return r, nil + } + return &offsetReadSeeker{ + rdr: r, + offset: pos, + }, err +} + +func (o *offsetReadSeeker) Read(p []byte) (n int, err error) { + return o.rdr.Read(p) +} + +func (o *offsetReadSeeker) Seek(offset int64, whence int) (int64, error) { + switch whence { + case io.SeekStart: + if offset < 0 { + return 0, fmt.Errorf("cannot seek < 0") + } + newOffset, err := o.rdr.Seek(o.offset+offset, io.SeekStart) + return newOffset - o.offset, err + case io.SeekCurrent: + currentOffset, err := o.rdr.Seek(0, io.SeekCurrent) + if err != nil { + return 0, fmt.Errorf("cannot seek current: %w", err) + } + if currentOffset-o.offset+offset < 0 { + return 0, fmt.Errorf("cannot seek < 0") + } + newOffset, err := o.rdr.Seek(offset, io.SeekCurrent) + return newOffset - o.offset, err + } + return 0, fmt.Errorf("only SeekStart and SeekCurrent supported") +} + +var _ io.ReadSeeker = (*offsetReadSeeker)(nil) diff --git a/syft/format/internal/stream/seekable_reader_test.go b/syft/format/internal/stream/seekable_reader_test.go index d1bc3577a..035325305 100644 --- a/syft/format/internal/stream/seekable_reader_test.go +++ b/syft/format/internal/stream/seekable_reader_test.go @@ -3,10 +3,9 @@ package stream import ( "bytes" "io" - "reflect" + "strings" "testing" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -26,7 +25,7 @@ func TestSeekableReader(t *testing.T) { name: "empty reader", input: bytes.NewBuffer([]byte{}), // does not implement io.Seeker (but does implement io.Reader) assert: func(input io.Reader, got io.ReadSeeker) { - impl, ok := got.(*bytes.Reader) // implements bytes.Reader + impl, ok := got.(*bytes.Reader) // contents are copied to a byte slice, accessed via bytes.Reader require.True(t, ok) _, err := impl.Seek(0, io.SeekStart) require.NoError(t, err) @@ -37,38 +36,28 @@ func TestSeekableReader(t *testing.T) { }, { name: "empty read seeker", - input: bytes.NewReader([]byte{}), // implements io.ReadSeeker + input: strings.NewReader(""), // implements io.ReadSeeker, not offset assert: func(input io.Reader, got io.ReadSeeker) { - impl, ok := got.(*bytes.Reader) + _, ok := got.(*strings.Reader) // same ReadSeeker is returned when not offset require.True(t, ok) - _, err := impl.Seek(0, io.SeekStart) + _, err := got.Seek(0, io.SeekStart) require.NoError(t, err) - content, err := io.ReadAll(impl) + content, err := io.ReadAll(got) require.NoError(t, err) require.Equal(t, []byte{}, content) - - // assert this is the same read seeker (reflect tt.input pointer is the same as the impl pointer - inputImpl, ok := input.(*bytes.Reader) - require.True(t, ok) - assert.Equal(t, reflect.ValueOf(inputImpl).Pointer(), reflect.ValueOf(impl).Pointer()) }, }, { name: "non-empty read seeker", - input: bytes.NewReader([]byte("hello world!")), // implements io.ReadSeeker + input: strings.NewReader("hello world!"), // implements io.ReadSeeker, not offset assert: func(input io.Reader, got io.ReadSeeker) { - impl, ok := got.(*bytes.Reader) + _, ok := got.(*strings.Reader) // same ReadSeeker is returned when not offset require.True(t, ok) - _, err := impl.Seek(0, io.SeekStart) + _, err := got.Seek(0, io.SeekStart) require.NoError(t, err) - content, err := io.ReadAll(impl) + content, err := io.ReadAll(got) require.NoError(t, err) require.Equal(t, []byte("hello world!"), content) - - // assert this is the same read seeker (reflect tt.input pointer is the same as the impl pointer - inputImpl, ok := input.(*bytes.Reader) - require.True(t, ok) - assert.Equal(t, reflect.ValueOf(inputImpl).Pointer(), reflect.ValueOf(impl).Pointer()) }, }, { @@ -84,6 +73,32 @@ func TestSeekableReader(t *testing.T) { require.Equal(t, []byte("hello world!"), content) }, }, + { + name: "position zero read seeker", + input: strings.NewReader("a string reader"), // implements io.ReadSeeker at position 0 + assert: func(input io.Reader, got io.ReadSeeker) { + _, ok := got.(*strings.Reader) // returns the same ReadSeeker + require.True(t, ok) + _, err := got.Seek(0, io.SeekStart) + require.NoError(t, err) + content, err := io.ReadAll(got) + require.NoError(t, err) + require.Equal(t, []byte("a string reader"), content) + }, + }, + { + name: "offset read seeker", + input: moveOffset(t, bytes.NewReader([]byte{1, 2, 3, 4, 5}), 3), // implements io.ReadSeeker, with an offset + assert: func(input io.Reader, got io.ReadSeeker) { + _, ok := got.(*offsetReadSeeker) // returns an offset-tracking ReadSeeker + require.True(t, ok) + _, err := got.Seek(0, io.SeekStart) + require.NoError(t, err) + content, err := io.ReadAll(got) + require.NoError(t, err) + require.Equal(t, []byte{4, 5}, content) + }, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -99,3 +114,113 @@ func TestSeekableReader(t *testing.T) { }) } } + +func Test_offsetReadSeeker(t *testing.T) { + abcd1234 := func() io.ReadSeeker { return strings.NewReader("abcd1234") } + abcd1234offset := func(offset int) func() io.ReadSeeker { + return func() io.ReadSeeker { + r := strings.NewReader("abcd1234") + _, err := r.Seek(int64(offset), io.SeekStart) + require.NoError(t, err) + return r + } + } + + tests := []struct { + name string + input func() io.ReadSeeker + seek int64 + seek2 int64 + whence int + expected string + wantErr require.ErrorAssertionFunc + }{ + { + name: "basic reader", + input: abcd1234, + seek: 0, + whence: io.SeekStart, + expected: "abcd1234", + }, + { + name: "basic reader offset", + input: abcd1234offset(1), + seek: 0, + whence: io.SeekStart, + expected: "bcd1234", + }, + { + name: "basic reader offset both", + input: abcd1234offset(2), + seek: 2, + whence: io.SeekStart, + expected: "1234", + }, + { + name: "basic reader offset seek current", + input: abcd1234offset(1), + seek: -1, + whence: io.SeekCurrent, + wantErr: require.Error, // would be < current, which is an error + }, + { + name: "valid negative offset from current", + input: abcd1234offset(1), + seek: 2, + seek2: -1, + whence: io.SeekCurrent, + expected: "cd1234", + }, + { + name: "basic reader offset multiple", + input: abcd1234offset(2), + seek: 3, + seek2: 2, + whence: io.SeekCurrent, + expected: "4", + }, + { + name: "bad whence", + input: abcd1234, + seek: 1, + whence: io.SeekEnd, + wantErr: require.Error, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + rdr := tt.input() + + off, err := rdr.Seek(0, io.SeekCurrent) + require.NoError(t, err) + + // construct new offsetReadSeeker + sr := offsetReadSeeker{rdr: rdr, offset: off} + + _, err = sr.Seek(tt.seek, tt.whence) + if tt.seek2 != 0 { + require.NoError(t, err) + _, err = sr.Seek(tt.seek2, tt.whence) + } + if tt.wantErr != nil { + tt.wantErr(t, err) + return + } else { + require.NoError(t, err) + } + + buf := make([]byte, 1024) + n, err := sr.Read(buf) + require.NoError(t, err) + require.Equal(t, tt.expected, string(buf[:n])) + }) + } +} + +func moveOffset(t *testing.T, reader io.ReadSeeker, offset int64) io.Reader { + pos, err := reader.Seek(offset, io.SeekStart) + require.NoError(t, err) + require.Equal(t, offset, pos) + return reader +} diff --git a/syft/format/purls/decoder.go b/syft/format/purls/decoder.go new file mode 100644 index 000000000..9cce2dcf5 --- /dev/null +++ b/syft/format/purls/decoder.go @@ -0,0 +1,93 @@ +package purls + +import ( + "bufio" + "errors" + "fmt" + "io" + "strings" + + "github.com/anchore/packageurl-go" + "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/syft/format/internal" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/sbom" +) + +var _ sbom.FormatDecoder = (*decoder)(nil) + +type decoder struct{} + +func NewFormatDecoder() sbom.FormatDecoder { + return decoder{} +} + +func (d decoder) Decode(r io.Reader) (*sbom.SBOM, sbom.FormatID, string, error) { + if r == nil { + return nil, "", "", fmt.Errorf("no reader provided") + } + s, err := toSyftModel(r) + return s, ID, version, err +} + +func (d decoder) Identify(r io.Reader) (sbom.FormatID, string) { + if r == nil { + return "", "" + } + + scanner := bufio.NewScanner(r) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if line == "" { + // skip whitespace only lines + continue + } + if strings.HasPrefix(line, "pkg:") { + _, err := packageurl.FromString(line) + if err != nil { + log.WithFields("error", err, "line", line).Debug("unable to parse purl") + continue + } + return ID, version + } + // not a purl, so we can't identify the format as a list of purls + return "", "" + } + + return "", "" +} + +func toSyftModel(r io.Reader) (*sbom.SBOM, error) { + var errs []error + pkgs := pkg.NewCollection() + + scanner := bufio.NewScanner(r) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + + if line == "" { + continue + } + + // skip invalid PURLs + _, err := packageurl.FromString(line) + if err != nil { + log.WithFields("error", err, "line", line).Debug("unable to parse purl") + continue + } + p := pkg.Package{ + // name, version and other properties set during Backfill + PURL: line, + } + + internal.Backfill(&p) + p.SetID() + pkgs.Add(p) + } + + return &sbom.SBOM{ + Artifacts: sbom.Artifacts{ + Packages: pkgs, + }, + }, errors.Join(errs...) +} diff --git a/syft/format/purls/decoder_test.go b/syft/format/purls/decoder_test.go new file mode 100644 index 000000000..26484ecd4 --- /dev/null +++ b/syft/format/purls/decoder_test.go @@ -0,0 +1,241 @@ +package purls + +import ( + "bytes" + "regexp" + "slices" + "strings" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/stretchr/testify/require" + + "github.com/anchore/packageurl-go" + "github.com/anchore/syft/internal/cmptest" + "github.com/anchore/syft/syft/pkg" +) + +func TestDecoder_Decode(t *testing.T) { + tests := []struct { + purl string + expected []pkg.Package + }{ + { + purl: "pkg:generic/some-package@1.2.3", + expected: []pkg.Package{ + { + Name: "some-package", + Type: pkg.UnknownPkg, + Version: "1.2.3", + PURL: "pkg:generic/some-package@1.2.3", + }, + }, + }, + { + purl: "pkg:npm/some-package@1.2.3", + expected: []pkg.Package{ + { + Name: "some-package", + Type: pkg.NpmPkg, + Language: pkg.JavaScript, + Version: "1.2.3", + PURL: "pkg:npm/some-package@1.2.3", + }, + }, + }, + { + purl: "pkg:apk/curl@7.61.1", + expected: []pkg.Package{ + { + Name: "curl", + Version: "7.61.1", + Type: pkg.ApkPkg, + PURL: "pkg:apk/curl@7.61.1", + }, + }, + }, + { + purl: "pkg:deb/debian/sysv-rc@2.88dsf-59?arch=all&distro=debian-jessie&upstream=sysvinit", + expected: []pkg.Package{ + { + Name: "sysv-rc", + Version: "2.88dsf-59", + Type: pkg.DebPkg, + PURL: "pkg:deb/debian/sysv-rc@2.88dsf-59?arch=all&distro=debian-jessie&upstream=sysvinit", + }, + }, + }, + { + purl: "pkg:apk/libcrypto3@3.3.2?upstream=openssl", + expected: []pkg.Package{ + { + Name: "libcrypto3", + Version: "3.3.2", + Type: pkg.ApkPkg, + PURL: "pkg:apk/libcrypto3@3.3.2?upstream=openssl", + }, + }, + }, + { + purl: "pkg:apk/libcrypto3@3.3.2?upstream=openssl%403.2.1", // %40 is @ + expected: []pkg.Package{ + { + Name: "libcrypto3", + Version: "3.3.2", + Type: pkg.ApkPkg, + PURL: "pkg:apk/libcrypto3@3.3.2?upstream=openssl%403.2.1", + }, + }, + }, + { + purl: "pkg:rpm/redhat/systemd-x@239-82.el8_10.2?arch=aarch64&distro=rhel-8.10&upstream=systemd-239-82.el8_10.2.src.rpm", + expected: []pkg.Package{ + { + Name: "systemd-x", + Version: "239-82.el8_10.2", + Type: pkg.RpmPkg, + PURL: "pkg:rpm/redhat/systemd-x@239-82.el8_10.2?arch=aarch64&distro=rhel-8.10&upstream=systemd-239-82.el8_10.2.src.rpm", + }, + }, + }, + { + purl: "pkg:rpm/redhat/dbus-common@1.12.8-26.el8?arch=noarch&distro=rhel-8.10&epoch=1&upstream=dbus-1.12.8-26.el8.src.rpm", + expected: []pkg.Package{ + { + Name: "dbus-common", + Version: "1:1.12.8-26.el8", + Type: pkg.RpmPkg, + PURL: "pkg:rpm/redhat/dbus-common@1.12.8-26.el8?arch=noarch&distro=rhel-8.10&epoch=1&upstream=dbus-1.12.8-26.el8.src.rpm", + }, + }, + }, + { + purl: "pkg:apk/curl@7.61.1?arch=aarch64&distro=alpine-3.20.3", + expected: []pkg.Package{ + { + Name: "curl", + Version: "7.61.1", + Type: pkg.ApkPkg, + PURL: "pkg:apk/curl@7.61.1?arch=aarch64&distro=alpine-3.20.3", + }, + }, + }, + { + purl: "pkg:golang/k8s.io/ingress-nginx@v1.11.2", + expected: []pkg.Package{ + { + Name: "k8s.io/ingress-nginx", + Version: "v1.11.2", + Type: pkg.GoModulePkg, + Language: pkg.Go, + PURL: "pkg:golang/k8s.io/ingress-nginx@v1.11.2", + }, + }, + }, + { + purl: "pkg:golang/github.com/wazuh/wazuh@v4.5.0", + expected: []pkg.Package{ + { + Name: "github.com/wazuh/wazuh", + Version: "v4.5.0", + Type: pkg.GoModulePkg, + PURL: "pkg:golang/github.com/wazuh/wazuh@v4.5.0", + Language: pkg.Go, + }, + }, + }, + { + purl: "pkg:golang/wazuh@v4.5.0", + expected: []pkg.Package{ + { + Name: "wazuh", + Version: "v4.5.0", + Type: pkg.GoModulePkg, + PURL: "pkg:golang/wazuh@v4.5.0", + Language: pkg.Go, + }, + }, + }, + { + purl: "pkg:maven/org.apache/some-pkg@4.11.3", + expected: []pkg.Package{ + { + Name: "some-pkg", + Version: "4.11.3", + Type: pkg.JavaPkg, + PURL: "pkg:maven/org.apache/some-pkg@4.11.3", + Language: pkg.Java, + Metadata: pkg.JavaArchive{ + PomProperties: &pkg.JavaPomProperties{ + GroupID: "org.apache", + ArtifactID: "some-pkg", + Version: "4.11.3", + }, + }, + }, + }, + }, + } + + for _, test := range tests { + t.Run(test.purl, func(t *testing.T) { + dec := NewFormatDecoder() + got, _, _, err := dec.Decode(strings.NewReader(test.purl)) + require.NoError(t, err) + + if diff := cmp.Diff(test.expected, got.Artifacts.Packages.Sorted(), cmptest.DefaultOptions()...); diff != "" { + t.Errorf("expected packages (-want +got):\n%s", diff) + } + }) + } +} + +func Test_DecodeEncodeCycle(t *testing.T) { + tests := []struct { + name string + input string + }{ + { + name: "basic", + input: "pkg:generic/some-package@1.2.3", + }, + { + name: "multiple", + input: "pkg:generic/pkg1\npkg:generic/pkg2\n\npkg:npm/@vercel/ncc@2.9.5", + }, + { + name: "java", + input: "pkg:maven/org.apache/some-thing@4.11.3", + }, + { + name: "leading whitespace", + input: " \n \t pkg:maven/org.apache/some-thing@4.11.3", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + dec := NewFormatDecoder() + decodedSBOM, _, _, err := dec.Decode(strings.NewReader(tt.input)) + require.NoError(t, err) + + var buf bytes.Buffer + enc := NewFormatEncoder() + require.NoError(t, enc.Encode(&buf, *decodedSBOM)) + + in := strings.TrimSpace(regexp.MustCompile(`\s+`).ReplaceAllString(strings.TrimSpace(tt.input), "\n")) + expected := strings.Split(in, "\n") + slices.Sort(expected) + + got := strings.Split(strings.TrimSpace(buf.String()), "\n") + slices.Sort(got) + require.EqualValues(t, expected, got) + + for _, item := range got { + // require every result is a valid PURL -- no whitespace lines, etc. + _, err = packageurl.FromString(item) + require.NoError(t, err) + } + }) + } +} diff --git a/syft/format/purls/encoder.go b/syft/format/purls/encoder.go new file mode 100644 index 000000000..37304ea5d --- /dev/null +++ b/syft/format/purls/encoder.go @@ -0,0 +1,62 @@ +package purls + +import ( + "io" + "strings" + + "github.com/scylladb/go-set/strset" + + "github.com/anchore/packageurl-go" + "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/syft/sbom" +) + +const ID sbom.FormatID = "purls" +const version = "1" + +type encoder struct { +} + +func NewFormatEncoder() sbom.FormatEncoder { + return encoder{} +} + +func (e encoder) ID() sbom.FormatID { + return ID +} + +func (e encoder) Aliases() []string { + return []string{ + "purl", + } +} + +func (e encoder) Version() string { + return sbom.AnyVersion +} + +func (e encoder) Encode(writer io.Writer, s sbom.SBOM) error { + output := strset.New() + for _, p := range s.Artifacts.Packages.Sorted() { + purl := strings.TrimSpace(p.PURL) + if purl == "" || output.Has(purl) { + continue + } + // ensure syft doesn't output invalid PURLs in this format + _, err := packageurl.FromString(purl) + if err != nil { + log.Debugf("invalid purl: %q", purl) + continue + } + output.Add(purl) + _, err = writer.Write([]byte(purl)) + if err != nil { + return err + } + _, err = writer.Write([]byte("\n")) + if err != nil { + return err + } + } + return nil +} diff --git a/syft/format/purls/encoder_test.go b/syft/format/purls/encoder_test.go new file mode 100644 index 000000000..f16848774 --- /dev/null +++ b/syft/format/purls/encoder_test.go @@ -0,0 +1,60 @@ +package purls + +import ( + "flag" + "testing" + + "github.com/anchore/syft/syft/format/internal/testutil" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/sbom" +) + +var updateSnapshot = flag.Bool("update-purls", false, "update the *.golden files for purls format") + +func Test_Encoder(t *testing.T) { + pkgs := []pkg.Package{ + { + Name: "npmtest", + Version: "1.5.1", + Type: pkg.NpmPkg, + Language: pkg.JavaScript, + PURL: "pkg:npm/npmtest@1.5.1", + }, + { + Name: "npmtest2", + Version: "1.5.1", + Type: pkg.NpmPkg, + Language: pkg.JavaScript, + PURL: "pkg:npm/npmtest@1.5.1", // duplicate should not be included + }, + { + Name: "npmtest", + Version: "3.1.1", + Type: pkg.NpmPkg, + Language: pkg.JavaScript, + PURL: "http://npm/npmtest@3.1.1", // invalid PURL should not be included + }, + { + Name: "javatest", + Version: "0.30.1", + Type: pkg.JavaPkg, + Language: pkg.Java, + PURL: "pkg:maven/org.apache/javatest@0.30.1", + }, + { + Type: pkg.UnknownPkg, + PURL: "pkg:generic/generic@1.2.3", + }, + } + testutil.AssertEncoderAgainstGoldenSnapshot(t, + testutil.EncoderSnapshotTestConfig{ + Subject: sbom.SBOM{Artifacts: sbom.Artifacts{ + Packages: pkg.NewCollection(pkgs...), + }}, + Format: NewFormatEncoder(), + UpdateSnapshot: *updateSnapshot, + PersistRedactionsInSnapshot: true, + IsJSON: false, + }, + ) +} diff --git a/syft/format/purls/test-fixtures/snapshot/Test_Encoder.golden b/syft/format/purls/test-fixtures/snapshot/Test_Encoder.golden new file mode 100644 index 000000000..c8201bf62 --- /dev/null +++ b/syft/format/purls/test-fixtures/snapshot/Test_Encoder.golden @@ -0,0 +1,3 @@ +pkg:generic/generic@1.2.3 +pkg:maven/org.apache/javatest@0.30.1 +pkg:npm/npmtest@1.5.1 diff --git a/syft/format/spdxjson/decoder_test.go b/syft/format/spdxjson/decoder_test.go index feee6bfb5..c200f3add 100644 --- a/syft/format/spdxjson/decoder_test.go +++ b/syft/format/spdxjson/decoder_test.go @@ -2,6 +2,7 @@ package spdxjson import ( "fmt" + "io" "os" "path/filepath" "testing" @@ -91,6 +92,8 @@ func TestDecoder_Decode(t *testing.T) { reader, err := os.Open(filepath.Join("test-fixtures", "spdx", test.name)) require.NoError(t, err) + reset := func() { _, err = reader.Seek(0, io.SeekStart); require.NoError(t, err) } + dec := NewFormatDecoder() formatID, formatVersion := dec.Identify(reader) @@ -98,6 +101,7 @@ func TestDecoder_Decode(t *testing.T) { assert.Equal(t, test.id, formatID) assert.Equal(t, test.version, formatVersion) + reset() _, decodeID, decodeVersion, err := dec.Decode(reader) require.Error(t, err) assert.Equal(t, test.id, decodeID) @@ -108,6 +112,7 @@ func TestDecoder_Decode(t *testing.T) { assert.Equal(t, test.id, formatID) assert.Equal(t, test.version, formatVersion) + reset() s, decodeID, decodeVersion, err := dec.Decode(reader) require.NoError(t, err) diff --git a/syft/format/spdxtagvalue/decoder.go b/syft/format/spdxtagvalue/decoder.go index 7b6a6fe8f..beed0e1c9 100644 --- a/syft/format/spdxtagvalue/decoder.go +++ b/syft/format/spdxtagvalue/decoder.go @@ -8,7 +8,6 @@ import ( "github.com/spdx/tools-golang/tagvalue" - "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/format/common/spdxhelpers" "github.com/anchore/syft/syft/format/internal/stream" "github.com/anchore/syft/syft/sbom" @@ -40,8 +39,8 @@ func (d decoder) Decode(r io.Reader) (*sbom.SBOM, sbom.FormatID, string, error) return nil, "", "", fmt.Errorf("unsupported spdx tag-value document version") } - if _, err := reader.Seek(0, io.SeekStart); err != nil { - return nil, "", "", fmt.Errorf("unable to seek to start of SPDX Tag-Value SBOM: %+v", err) + if _, err = reader.Seek(0, io.SeekStart); err != nil { + return nil, "", "", fmt.Errorf("unable to seek to start of SPDX Tag-Value SBOM: %w", err) } doc, err := tagvalue.Read(reader) @@ -57,13 +56,7 @@ func (d decoder) Decode(r io.Reader) (*sbom.SBOM, sbom.FormatID, string, error) } func (d decoder) Identify(r io.Reader) (sbom.FormatID, string) { - reader, err := stream.SeekableReader(r) - if err != nil { - return "", "" - } - - if _, err := reader.Seek(0, io.SeekStart); err != nil { - log.Debugf("unable to seek to start of SPDX Tag-Value SBOM: %+v", err) + if r == nil { return "", "" } @@ -72,7 +65,7 @@ func (d decoder) Identify(r io.Reader) (sbom.FormatID, string) { // DataLicense: CC0-1.0 // SPDXID: SPDXRef-DOCUMENT - scanner := bufio.NewScanner(reader) + scanner := bufio.NewScanner(r) scanner.Split(bufio.ScanLines) var id sbom.FormatID diff --git a/syft/format/spdxtagvalue/decoder_test.go b/syft/format/spdxtagvalue/decoder_test.go index 22c54e6f2..edf5b877c 100644 --- a/syft/format/spdxtagvalue/decoder_test.go +++ b/syft/format/spdxtagvalue/decoder_test.go @@ -2,6 +2,7 @@ package spdxtagvalue import ( "fmt" + "io" "os" "path/filepath" "strings" @@ -56,6 +57,10 @@ func TestDecoder_Decode(t *testing.T) { assert.Equal(t, ID, formatID) assert.NotEmpty(t, formatVersion) + // reset reader + _, err = reader.Seek(0, io.SeekStart) + require.NoError(t, err) + bom, decodeID, decodeVersion, err := dec.Decode(reader) require.NotNil(t, bom) require.NoError(t, err) diff --git a/syft/format/syftjson/decoder.go b/syft/format/syftjson/decoder.go index 83e5aa7f1..de9b51433 100644 --- a/syft/format/syftjson/decoder.go +++ b/syft/format/syftjson/decoder.go @@ -53,13 +53,7 @@ func (d decoder) Decode(r io.Reader) (*sbom.SBOM, sbom.FormatID, string, error) } func (d decoder) Identify(r io.Reader) (sbom.FormatID, string) { - reader, err := stream.SeekableReader(r) - if err != nil { - return "", "" - } - - if _, err := reader.Seek(0, io.SeekStart); err != nil { - log.Debugf("unable to seek to start of Syft JSON SBOM: %+v", err) + if r == nil { return "", "" } @@ -67,10 +61,10 @@ func (d decoder) Identify(r io.Reader) (sbom.FormatID, string) { Schema model.Schema `json:"schema"` } - dec := json.NewDecoder(reader) + dec := json.NewDecoder(r) var doc Document - if err = dec.Decode(&doc); err != nil { + if err := dec.Decode(&doc); err != nil { // maybe not json? maybe not valid? doesn't matter, we won't process it. return "", "" } diff --git a/syft/format/syftjson/to_syft_model.go b/syft/format/syftjson/to_syft_model.go index da56497fe..503f9ba45 100644 --- a/syft/format/syftjson/to_syft_model.go +++ b/syft/format/syftjson/to_syft_model.go @@ -16,6 +16,7 @@ import ( "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/cpe" "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/format/internal" "github.com/anchore/syft/syft/format/syftjson/model" "github.com/anchore/syft/syft/linux" "github.com/anchore/syft/syft/pkg" @@ -351,6 +352,8 @@ func toSyftPackage(p model.Package, idAliases map[string]string) pkg.Package { Metadata: p.Metadata, } + internal.Backfill(&out) + // always prefer the IDs from the SBOM over derived IDs out.OverrideID(artifact.ID(p.ID)) diff --git a/syft/pkg/cataloger/java/test-fixtures/graalvm-sbom/micronaut.json b/syft/pkg/cataloger/java/test-fixtures/graalvm-sbom/micronaut.json index ad9c05790..c4aba6303 100644 --- a/syft/pkg/cataloger/java/test-fixtures/graalvm-sbom/micronaut.json +++ b/syft/pkg/cataloger/java/test-fixtures/graalvm-sbom/micronaut.json @@ -62,7 +62,6 @@ }, { "type": "library", - "group": "org.example", "name": "basic-lib", "version": "1.0" } diff --git a/syft/pkg/url.go b/syft/pkg/url.go index b75ee5cfd..ef995f918 100644 --- a/syft/pkg/url.go +++ b/syft/pkg/url.go @@ -10,6 +10,7 @@ import ( const ( PURLQualifierArch = "arch" + PURLQualifierCPES = "cpes" PURLQualifierDistro = "distro" PURLQualifierEpoch = "epoch" PURLQualifierVCSURL = "vcs_url"