feat: Add PURL list input/output format (#3853)

Signed-off-by: Keith Zantow <kzantow@gmail.com>
This commit is contained in:
Keith Zantow 2025-05-12 13:33:24 -04:00 committed by GitHub
parent bea57a4f7d
commit 621d21eb04
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
28 changed files with 1079 additions and 60 deletions

View File

@ -12,6 +12,7 @@ import (
"github.com/anchore/syft/syft/format/cyclonedxjson"
"github.com/anchore/syft/syft/format/cyclonedxxml"
"github.com/anchore/syft/syft/format/github"
"github.com/anchore/syft/syft/format/purls"
"github.com/anchore/syft/syft/format/spdxjson"
"github.com/anchore/syft/syft/format/spdxtagvalue"
"github.com/anchore/syft/syft/format/syftjson"
@ -127,6 +128,7 @@ func supportedIDs() []sbom.FormatID {
table.ID,
text.ID,
template.ID,
purls.ID,
// encoders that support multiple versions
cyclonedxxml.ID,

View File

@ -18,6 +18,7 @@ import (
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/cpe"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/format/internal"
"github.com/anchore/syft/syft/format/internal/spdxutil/helpers"
"github.com/anchore/syft/syft/license"
"github.com/anchore/syft/syft/linux"
@ -509,6 +510,8 @@ func toSyftPackage(p *spdx.Package) pkg.Package {
Metadata: extractMetadata(p, info),
}
internal.Backfill(sP)
if p.PackageSPDXIdentifier != "" {
// always prefer the IDs from the SBOM over derived IDs
sP.OverrideID(artifact.ID(p.PackageSPDXIdentifier))

View File

@ -40,6 +40,10 @@ func (d decoder) Decode(r io.Reader) (*sbom.SBOM, sbom.FormatID, string, error)
return nil, "", "", fmt.Errorf("unsupported cyclonedx json document version")
}
_, err = reader.Seek(0, io.SeekStart)
if err != nil {
return nil, id, version, fmt.Errorf("unable to seek to start of CycloneDX JSON SBOM: %+v", err)
}
doc, err := d.decoder.Decode(reader)
if err != nil {
return nil, id, version, fmt.Errorf("unable to decode cyclonedx json document: %w", err)

View File

@ -2,6 +2,7 @@ package cyclonedxjson
import (
"fmt"
"io"
"os"
"path/filepath"
"strings"
@ -43,6 +44,7 @@ func TestDecoder_Decode(t *testing.T) {
t.Run(test.file, func(t *testing.T) {
reader, err := os.Open(filepath.Join("test-fixtures", test.file))
require.NoError(t, err)
reset := func() { _, err = reader.Seek(0, io.SeekStart); require.NoError(t, err) }
dec := NewFormatDecoder()
@ -51,6 +53,7 @@ func TestDecoder_Decode(t *testing.T) {
assert.Equal(t, sbom.FormatID(""), formatID)
assert.Equal(t, "", formatVersion)
reset()
_, decodeID, decodeVersion, err := dec.Decode(reader)
require.Error(t, err)
assert.Equal(t, sbom.FormatID(""), decodeID)
@ -61,6 +64,7 @@ func TestDecoder_Decode(t *testing.T) {
assert.Equal(t, ID, formatID)
assert.NotEmpty(t, formatVersion)
reset()
bom, decodeID, decodeVersion, err := dec.Decode(reader)
require.NotNil(t, bom)
require.NoError(t, err)

View File

@ -41,6 +41,10 @@ func (d decoder) Decode(r io.Reader) (*sbom.SBOM, sbom.FormatID, string, error)
return nil, "", "", fmt.Errorf("unsupported cyclonedx xml document version")
}
_, err = reader.Seek(0, io.SeekStart)
if err != nil {
return nil, id, version, fmt.Errorf("unable to seek to start of CycloneDX XML SBOM: %w", err)
}
doc, err := d.decoder.Decode(reader)
if err != nil {
return nil, id, version, fmt.Errorf("unable to decode cyclonedx xml document: %w", err)

View File

@ -2,6 +2,7 @@ package cyclonedxxml
import (
"fmt"
"io"
"os"
"path/filepath"
"strings"
@ -44,6 +45,8 @@ func TestDecoder_Decode(t *testing.T) {
reader, err := os.Open(filepath.Join("test-fixtures", test.file))
require.NoError(t, err)
reset := func() { _, err = reader.Seek(0, io.SeekStart); require.NoError(t, err) }
dec := NewFormatDecoder()
formatID, formatVersion := dec.Identify(reader)
@ -51,6 +54,7 @@ func TestDecoder_Decode(t *testing.T) {
assert.Equal(t, sbom.FormatID(""), formatID)
assert.Equal(t, "", formatVersion)
reset()
_, decodeID, decodeVersion, err := dec.Decode(reader)
require.Error(t, err)
assert.Equal(t, sbom.FormatID(""), decodeID)
@ -61,6 +65,7 @@ func TestDecoder_Decode(t *testing.T) {
assert.Equal(t, ID, formatID)
assert.NotEmpty(t, formatVersion)
reset()
bom, decodeID, decodeVersion, err := dec.Decode(reader)
require.NotNil(t, bom)
require.NoError(t, err)

View File

@ -5,6 +5,7 @@ import (
"github.com/anchore/syft/syft/format/cyclonedxjson"
"github.com/anchore/syft/syft/format/cyclonedxxml"
"github.com/anchore/syft/syft/format/purls"
"github.com/anchore/syft/syft/format/spdxjson"
"github.com/anchore/syft/syft/format/spdxtagvalue"
"github.com/anchore/syft/syft/format/syftjson"
@ -24,6 +25,7 @@ func Decoders() []sbom.FormatDecoder {
cyclonedxjson.NewFormatDecoder(),
spdxtagvalue.NewFormatDecoder(),
spdxjson.NewFormatDecoder(),
purls.NewFormatDecoder(),
}
}

View File

@ -34,6 +34,10 @@ func (c *DecoderCollection) Decode(r io.Reader) (*sbom.SBOM, sbom.FormatID, stri
var bestID sbom.FormatID
for _, d := range c.decoders {
_, err = reader.Seek(0, io.SeekStart)
if err != nil {
return nil, "", "", fmt.Errorf("unable to seek to start of SBOM: %w", err)
}
id, version := d.Identify(reader)
if id == "" || version == "" {
if id != "" {
@ -42,6 +46,10 @@ func (c *DecoderCollection) Decode(r io.Reader) (*sbom.SBOM, sbom.FormatID, stri
continue
}
_, err = reader.Seek(0, io.SeekStart)
if err != nil {
return nil, "", "", fmt.Errorf("unable to seek to start of SBOM: %w", err)
}
return d.Decode(reader)
}
@ -65,6 +73,10 @@ func (c *DecoderCollection) Identify(r io.Reader) (sbom.FormatID, string) {
}
for _, d := range c.decoders {
_, err = reader.Seek(0, io.SeekStart)
if err != nil {
log.Debugf("unable to seek to start of SBOM: %v", err)
}
id, version := d.Identify(reader)
if id != "" && version != "" {
return id, version

View File

@ -8,6 +8,7 @@ import (
"github.com/anchore/syft/syft/format/cyclonedxjson"
"github.com/anchore/syft/syft/format/cyclonedxxml"
"github.com/anchore/syft/syft/format/github"
"github.com/anchore/syft/syft/format/purls"
"github.com/anchore/syft/syft/format/spdxjson"
"github.com/anchore/syft/syft/format/spdxtagvalue"
"github.com/anchore/syft/syft/format/syftjson"
@ -62,6 +63,7 @@ func (o EncodersConfig) Encoders() ([]sbom.FormatEncoder, error) {
l.addWithErr(syftjson.ID)(o.syftJSONEncoders())
l.add(table.ID)(table.NewFormatEncoder())
l.add(text.ID)(text.NewFormatEncoder())
l.add(purls.ID)(purls.NewFormatEncoder())
l.add(github.ID)(github.NewFormatEncoder())
l.addWithErr(cyclonedxxml.ID)(o.cyclonedxXMLEncoders())
l.addWithErr(cyclonedxjson.ID)(o.cyclonedxJSONEncoders())

View File

@ -37,6 +37,7 @@ func expectedDefaultEncoders() *strset.Set {
expected.Add("syft-table@") // no version
expected.Add("syft-text@") // no version
expected.Add("github-json@") // no version
expected.Add("purls@") // no version
for _, v := range spdxjson.SupportedVersions() {
expected.Add("spdx-json@" + v)
}
@ -108,6 +109,7 @@ func TestEncodersConfig_Encoders(t *testing.T) {
expected.Add("syft-table@") // no version
expected.Add("syft-text@") // no version
expected.Add("github-json@") // no version
expected.Add("purls@") // no version
expected.Add("spdx-json@" + spdxutil.DefaultVersion)
expected.Add("spdx-tag-value@" + spdxutil.DefaultVersion)
expected.Add("cyclonedx-json@" + cyclonedxutil.DefaultVersion)

View File

@ -0,0 +1,137 @@
package internal
import (
"fmt"
"regexp"
"slices"
"strings"
"github.com/anchore/packageurl-go"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/cpe"
"github.com/anchore/syft/syft/pkg"
)
// Backfill takes all information present in the package and attempts to fill in any missing information
// from any available sources, such as the Metadata and PURL.
//
// Backfill does not call p.SetID(), but this needs to be called later to ensure it's up to date
func Backfill(p *pkg.Package) {
if p.PURL == "" {
return
}
purl, err := packageurl.FromString(p.PURL)
if err != nil {
log.Debug("unable to parse purl: %s: %w", p.PURL, err)
return
}
var cpes []cpe.CPE
epoch := ""
for _, qualifier := range purl.Qualifiers {
switch qualifier.Key {
case pkg.PURLQualifierCPES:
rawCpes := strings.Split(qualifier.Value, ",")
for _, rawCpe := range rawCpes {
c, err := cpe.New(rawCpe, cpe.DeclaredSource)
if err != nil {
log.Debugf("unable to decode cpe %s in purl %s: %w", rawCpe, p.PURL, err)
continue
}
cpes = append(cpes, c)
}
case pkg.PURLQualifierEpoch:
epoch = qualifier.Value
}
}
if p.Type == "" {
p.Type = pkg.TypeFromPURL(p.PURL)
}
if p.Language == "" {
p.Language = pkg.LanguageFromPURL(p.PURL)
}
if p.Name == "" {
p.Name = nameFromPurl(purl)
}
setVersionFromPurl(p, purl, epoch)
if p.Language == pkg.Java {
setJavaMetadataFromPurl(p, purl)
}
for _, c := range cpes {
if slices.Contains(p.CPEs, c) {
continue
}
p.CPEs = append(p.CPEs, c)
}
}
func setJavaMetadataFromPurl(p *pkg.Package, purl packageurl.PackageURL) {
if p.Type != pkg.JavaPkg {
return
}
if purl.Namespace != "" {
if p.Metadata == nil {
p.Metadata = pkg.JavaArchive{}
}
meta, got := p.Metadata.(pkg.JavaArchive)
if got && meta.PomProperties == nil {
meta.PomProperties = &pkg.JavaPomProperties{}
p.Metadata = meta
}
if meta.PomProperties != nil {
// capture the group id from the purl if it is not already set
if meta.PomProperties.ArtifactID == "" {
meta.PomProperties.ArtifactID = purl.Name
}
if meta.PomProperties.GroupID == "" {
meta.PomProperties.GroupID = purl.Namespace
}
if meta.PomProperties.Version == "" {
meta.PomProperties.Version = purl.Version
}
}
}
}
func setVersionFromPurl(p *pkg.Package, purl packageurl.PackageURL, epoch string) {
if p.Version == "" {
p.Version = purl.Version
}
if epoch != "" && p.Type == pkg.RpmPkg && !epochPrefix.MatchString(p.Version) {
p.Version = fmt.Sprintf("%s:%s", epoch, p.Version)
}
}
var epochPrefix = regexp.MustCompile(`^\d+:`)
// nameFromPurl returns the syft package name of the package from the purl. If the purl includes a namespace,
// the name is prefixed as appropriate based on the PURL type
func nameFromPurl(purl packageurl.PackageURL) string {
if !nameExcludesPurlNamespace(purl.Type) && purl.Namespace != "" {
return fmt.Sprintf("%s/%s", purl.Namespace, purl.Name)
}
return purl.Name
}
func nameExcludesPurlNamespace(purlType string) bool {
switch purlType {
case packageurl.TypeAlpine,
packageurl.TypeAlpm,
packageurl.TypeConan,
packageurl.TypeCpan,
packageurl.TypeDebian,
packageurl.TypeMaven,
packageurl.TypeQpkg,
packageurl.TypeRPM,
packageurl.TypeSWID:
return true
}
return false
}

View File

@ -0,0 +1,177 @@
package internal
import (
"testing"
"github.com/stretchr/testify/require"
"github.com/anchore/packageurl-go"
"github.com/anchore/syft/syft/cpe"
"github.com/anchore/syft/syft/pkg"
)
func Test_Backfill(t *testing.T) {
tests := []struct {
name string
in pkg.Package
expected pkg.Package
}{
{
name: "npm type",
in: pkg.Package{
PURL: "pkg:npm/test@3.0.0",
},
expected: pkg.Package{
PURL: "pkg:npm/test@3.0.0",
Type: pkg.NpmPkg,
Language: pkg.JavaScript,
Name: "test",
Version: "3.0.0",
},
},
{
name: "rpm no epoch",
in: pkg.Package{
PURL: "pkg:rpm/redhat/dbus-common@1.12.8-26.el8?arch=noarch&distro=rhel-8.10&upstream=dbus-1.12.8-26.el8.src.rpm",
},
expected: pkg.Package{
PURL: "pkg:rpm/redhat/dbus-common@1.12.8-26.el8?arch=noarch&distro=rhel-8.10&upstream=dbus-1.12.8-26.el8.src.rpm",
Type: pkg.RpmPkg,
Name: "dbus-common",
Version: "1.12.8-26.el8",
},
},
{
name: "rpm epoch",
in: pkg.Package{
PURL: "pkg:rpm/redhat/dbus-common@1.12.8-26.el8?arch=noarch&distro=rhel-8.10&epoch=1&upstream=dbus-1.12.8-26.el8.src.rpm",
},
expected: pkg.Package{
PURL: "pkg:rpm/redhat/dbus-common@1.12.8-26.el8?arch=noarch&distro=rhel-8.10&epoch=1&upstream=dbus-1.12.8-26.el8.src.rpm",
Type: pkg.RpmPkg,
Name: "dbus-common",
Version: "1:1.12.8-26.el8",
},
},
{
name: "bad cpe",
in: pkg.Package{
PURL: "pkg:npm/testp@3.0.0?cpes=cpe:2.3a:testv:testp:3.0.0:*:*:*:*:*:*:*",
},
expected: pkg.Package{
PURL: "pkg:npm/testp@3.0.0?cpes=cpe:2.3a:testv:testp:3.0.0:*:*:*:*:*:*:*",
Type: pkg.NpmPkg,
Language: pkg.JavaScript,
Name: "testp",
Version: "3.0.0",
},
},
{
name: "good cpe",
in: pkg.Package{
PURL: "pkg:npm/testp@3.0.0?cpes=cpe:2.3:a:testv:testp:3.0.0:*:*:*:*:*:*:*",
},
expected: pkg.Package{
PURL: "pkg:npm/testp@3.0.0?cpes=cpe:2.3:a:testv:testp:3.0.0:*:*:*:*:*:*:*",
Type: pkg.NpmPkg,
Language: pkg.JavaScript,
Name: "testp",
Version: "3.0.0",
CPEs: []cpe.CPE{
{
Attributes: cpe.Attributes{
Part: "a",
Vendor: "testv",
Product: "testp",
Version: "3.0.0",
},
Source: cpe.DeclaredSource,
},
},
},
},
{
name: "java type",
in: pkg.Package{
PURL: "pkg:maven/org.apache/some-thing@1.2.3",
},
expected: pkg.Package{
PURL: "pkg:maven/org.apache/some-thing@1.2.3",
Type: pkg.JavaPkg,
Language: pkg.Java,
Name: "some-thing",
Version: "1.2.3",
Metadata: pkg.JavaArchive{
PomProperties: &pkg.JavaPomProperties{
GroupID: "org.apache",
ArtifactID: "some-thing",
Version: "1.2.3",
},
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Backfill(&tt.in)
tt.in.OverrideID("")
require.Equal(t, tt.expected, tt.in)
})
}
}
func Test_nameFromPurl(t *testing.T) {
tests := []struct {
in string
expected string
}{
{
in: "pkg:npm/some-name@3.0.0",
expected: "some-name",
},
{
in: "pkg:maven/org.apache/some-name@1.2.3",
expected: "some-name",
},
{
in: "pkg:deb/debian/some-name@3.0.0",
expected: "some-name",
},
{
in: "pkg:rpm/redhat/some-name@3.0.0",
expected: "some-name",
},
{
in: "pkg:gem/some-name@3.0.0",
expected: "some-name",
},
{
in: "pkg:apk/alpine/some-name@3.0.0",
expected: "some-name",
},
{
in: "pkg:docker/some-org/some-name@3.0.0",
expected: "some-org/some-name",
},
{
in: "pkg:npm/some-name@3.0.0",
expected: "some-name",
},
{
in: "pkg:npm/some-org/some-name@3.0.0",
expected: "some-org/some-name",
},
{
in: "pkg:oci/library/mysql@8.1.0",
expected: "library/mysql",
},
}
for _, tt := range tests {
t.Run(tt.in, func(t *testing.T) {
p, err := packageurl.FromString(tt.in)
require.NoError(t, err)
got := nameFromPurl(p)
require.Equal(t, tt.expected, got)
})
}
}

View File

@ -1,12 +1,9 @@
package cyclonedxutil
import (
"fmt"
"io"
"github.com/CycloneDX/cyclonedx-go"
"github.com/anchore/syft/syft/format/internal/stream"
)
type Decoder struct {
@ -20,19 +17,10 @@ func NewDecoder(format cyclonedx.BOMFileFormat) Decoder {
}
func (d Decoder) Decode(r io.Reader) (*cyclonedx.BOM, error) {
reader, err := stream.SeekableReader(r)
if err != nil {
return nil, err
}
doc := &cyclonedx.BOM{
Components: &[]cyclonedx.Component{},
}
if _, err := reader.Seek(0, io.SeekStart); err != nil {
return nil, fmt.Errorf("unable to seek to start of CycloneDX SBOM: %w", err)
}
err = cyclonedx.NewBOMDecoder(reader, d.format).Decode(doc)
err := cyclonedx.NewBOMDecoder(r, d.format).Decode(doc)
if err != nil {
return nil, err
}

View File

@ -1,12 +1,14 @@
package helpers
import (
"fmt"
"reflect"
"github.com/CycloneDX/cyclonedx-go"
"github.com/anchore/packageurl-go"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/format/internal"
"github.com/anchore/syft/syft/internal/packagemetadata"
"github.com/anchore/syft/syft/pkg"
)
@ -84,7 +86,6 @@ func decodeComponent(c *cyclonedx.Component) *pkg.Package {
}
p := &pkg.Package{
Name: c.Name,
Version: c.Version,
Locations: decodeLocations(values),
Licenses: pkg.NewLicenseSet(decodeLicenses(c)...),
@ -102,13 +103,50 @@ func decodeComponent(c *cyclonedx.Component) *pkg.Package {
p.Type = pkg.TypeFromPURL(p.PURL)
}
if p.Language == "" {
p.Language = pkg.LanguageFromPURL(p.PURL)
}
setPackageName(p, c)
internal.Backfill(p)
p.SetID()
return p
}
func setPackageName(p *pkg.Package, c *cyclonedx.Component) {
name := c.Name
if c.Group != "" {
switch p.Type {
case pkg.JavaPkg:
if p.Metadata == nil {
p.Metadata = pkg.JavaArchive{}
}
var pomProperties *pkg.JavaPomProperties
javaMetadata, ok := p.Metadata.(pkg.JavaArchive)
if ok {
pomProperties = javaMetadata.PomProperties
if pomProperties == nil {
pomProperties = &pkg.JavaPomProperties{}
javaMetadata.PomProperties = pomProperties
p.Metadata = javaMetadata
}
}
if pomProperties != nil {
if pomProperties.ArtifactID == "" {
pomProperties.ArtifactID = c.Name
}
if pomProperties.GroupID == "" {
pomProperties.GroupID = c.Group
}
if pomProperties.Version == "" {
pomProperties.Version = p.Version
}
}
default:
name = fmt.Sprintf("%s/%s", c.Group, name)
}
}
p.Name = name
}
func decodeLocations(vals map[string]string) file.LocationSet {
v := Decode(reflect.TypeOf([]file.Location{}), vals, "syft:location", CycloneDXFields)
out, ok := v.([]file.Location)

View File

@ -6,13 +6,18 @@ import (
"io"
)
// SeekableReader takes an io.Reader and returns an io.ReadSeeker relative to the current position of the reader.
// Users of this function expect to be able to reset the reader to the current position, not potentially reset the
// reader prior to the location when this reader is provided. An example is a reader with multiple JSON
// documents separated by newlines (JSONL). After reading the first document, if a call is made to decode
// the second and Seek(0, SeekStart) is called it would reset the overall reader back to the first document.
func SeekableReader(reader io.Reader) (io.ReadSeeker, error) {
if reader == nil {
return nil, fmt.Errorf("no bytes provided")
}
if r, ok := reader.(io.ReadSeeker); ok {
return r, nil
return getOffsetReadSeeker(r)
}
content, err := io.ReadAll(reader)
@ -22,3 +27,53 @@ func SeekableReader(reader io.Reader) (io.ReadSeeker, error) {
return bytes.NewReader(content), nil
}
type offsetReadSeeker struct {
rdr io.ReadSeeker
offset int64
}
// getOffsetReadSeeker returns a new io.ReadSeeker that may wrap another io.ReadSeeker with the current offset, so
// seek calls will be relative to the _current_ position, rather than relative to the reader itself
func getOffsetReadSeeker(r io.ReadSeeker) (io.ReadSeeker, error) {
if r == nil {
return nil, fmt.Errorf("no reader provided")
}
pos, err := r.Seek(0, io.SeekCurrent)
if pos == 0 {
// if the ReadSeeker is currently at 0, we don't need to track an offset
return r, nil
}
return &offsetReadSeeker{
rdr: r,
offset: pos,
}, err
}
func (o *offsetReadSeeker) Read(p []byte) (n int, err error) {
return o.rdr.Read(p)
}
func (o *offsetReadSeeker) Seek(offset int64, whence int) (int64, error) {
switch whence {
case io.SeekStart:
if offset < 0 {
return 0, fmt.Errorf("cannot seek < 0")
}
newOffset, err := o.rdr.Seek(o.offset+offset, io.SeekStart)
return newOffset - o.offset, err
case io.SeekCurrent:
currentOffset, err := o.rdr.Seek(0, io.SeekCurrent)
if err != nil {
return 0, fmt.Errorf("cannot seek current: %w", err)
}
if currentOffset-o.offset+offset < 0 {
return 0, fmt.Errorf("cannot seek < 0")
}
newOffset, err := o.rdr.Seek(offset, io.SeekCurrent)
return newOffset - o.offset, err
}
return 0, fmt.Errorf("only SeekStart and SeekCurrent supported")
}
var _ io.ReadSeeker = (*offsetReadSeeker)(nil)

View File

@ -3,10 +3,9 @@ package stream
import (
"bytes"
"io"
"reflect"
"strings"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
@ -26,7 +25,7 @@ func TestSeekableReader(t *testing.T) {
name: "empty reader",
input: bytes.NewBuffer([]byte{}), // does not implement io.Seeker (but does implement io.Reader)
assert: func(input io.Reader, got io.ReadSeeker) {
impl, ok := got.(*bytes.Reader) // implements bytes.Reader
impl, ok := got.(*bytes.Reader) // contents are copied to a byte slice, accessed via bytes.Reader
require.True(t, ok)
_, err := impl.Seek(0, io.SeekStart)
require.NoError(t, err)
@ -37,38 +36,28 @@ func TestSeekableReader(t *testing.T) {
},
{
name: "empty read seeker",
input: bytes.NewReader([]byte{}), // implements io.ReadSeeker
input: strings.NewReader(""), // implements io.ReadSeeker, not offset
assert: func(input io.Reader, got io.ReadSeeker) {
impl, ok := got.(*bytes.Reader)
_, ok := got.(*strings.Reader) // same ReadSeeker is returned when not offset
require.True(t, ok)
_, err := impl.Seek(0, io.SeekStart)
_, err := got.Seek(0, io.SeekStart)
require.NoError(t, err)
content, err := io.ReadAll(impl)
content, err := io.ReadAll(got)
require.NoError(t, err)
require.Equal(t, []byte{}, content)
// assert this is the same read seeker (reflect tt.input pointer is the same as the impl pointer
inputImpl, ok := input.(*bytes.Reader)
require.True(t, ok)
assert.Equal(t, reflect.ValueOf(inputImpl).Pointer(), reflect.ValueOf(impl).Pointer())
},
},
{
name: "non-empty read seeker",
input: bytes.NewReader([]byte("hello world!")), // implements io.ReadSeeker
input: strings.NewReader("hello world!"), // implements io.ReadSeeker, not offset
assert: func(input io.Reader, got io.ReadSeeker) {
impl, ok := got.(*bytes.Reader)
_, ok := got.(*strings.Reader) // same ReadSeeker is returned when not offset
require.True(t, ok)
_, err := impl.Seek(0, io.SeekStart)
_, err := got.Seek(0, io.SeekStart)
require.NoError(t, err)
content, err := io.ReadAll(impl)
content, err := io.ReadAll(got)
require.NoError(t, err)
require.Equal(t, []byte("hello world!"), content)
// assert this is the same read seeker (reflect tt.input pointer is the same as the impl pointer
inputImpl, ok := input.(*bytes.Reader)
require.True(t, ok)
assert.Equal(t, reflect.ValueOf(inputImpl).Pointer(), reflect.ValueOf(impl).Pointer())
},
},
{
@ -84,6 +73,32 @@ func TestSeekableReader(t *testing.T) {
require.Equal(t, []byte("hello world!"), content)
},
},
{
name: "position zero read seeker",
input: strings.NewReader("a string reader"), // implements io.ReadSeeker at position 0
assert: func(input io.Reader, got io.ReadSeeker) {
_, ok := got.(*strings.Reader) // returns the same ReadSeeker
require.True(t, ok)
_, err := got.Seek(0, io.SeekStart)
require.NoError(t, err)
content, err := io.ReadAll(got)
require.NoError(t, err)
require.Equal(t, []byte("a string reader"), content)
},
},
{
name: "offset read seeker",
input: moveOffset(t, bytes.NewReader([]byte{1, 2, 3, 4, 5}), 3), // implements io.ReadSeeker, with an offset
assert: func(input io.Reader, got io.ReadSeeker) {
_, ok := got.(*offsetReadSeeker) // returns an offset-tracking ReadSeeker
require.True(t, ok)
_, err := got.Seek(0, io.SeekStart)
require.NoError(t, err)
content, err := io.ReadAll(got)
require.NoError(t, err)
require.Equal(t, []byte{4, 5}, content)
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
@ -99,3 +114,113 @@ func TestSeekableReader(t *testing.T) {
})
}
}
func Test_offsetReadSeeker(t *testing.T) {
abcd1234 := func() io.ReadSeeker { return strings.NewReader("abcd1234") }
abcd1234offset := func(offset int) func() io.ReadSeeker {
return func() io.ReadSeeker {
r := strings.NewReader("abcd1234")
_, err := r.Seek(int64(offset), io.SeekStart)
require.NoError(t, err)
return r
}
}
tests := []struct {
name string
input func() io.ReadSeeker
seek int64
seek2 int64
whence int
expected string
wantErr require.ErrorAssertionFunc
}{
{
name: "basic reader",
input: abcd1234,
seek: 0,
whence: io.SeekStart,
expected: "abcd1234",
},
{
name: "basic reader offset",
input: abcd1234offset(1),
seek: 0,
whence: io.SeekStart,
expected: "bcd1234",
},
{
name: "basic reader offset both",
input: abcd1234offset(2),
seek: 2,
whence: io.SeekStart,
expected: "1234",
},
{
name: "basic reader offset seek current",
input: abcd1234offset(1),
seek: -1,
whence: io.SeekCurrent,
wantErr: require.Error, // would be < current, which is an error
},
{
name: "valid negative offset from current",
input: abcd1234offset(1),
seek: 2,
seek2: -1,
whence: io.SeekCurrent,
expected: "cd1234",
},
{
name: "basic reader offset multiple",
input: abcd1234offset(2),
seek: 3,
seek2: 2,
whence: io.SeekCurrent,
expected: "4",
},
{
name: "bad whence",
input: abcd1234,
seek: 1,
whence: io.SeekEnd,
wantErr: require.Error,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
rdr := tt.input()
off, err := rdr.Seek(0, io.SeekCurrent)
require.NoError(t, err)
// construct new offsetReadSeeker
sr := offsetReadSeeker{rdr: rdr, offset: off}
_, err = sr.Seek(tt.seek, tt.whence)
if tt.seek2 != 0 {
require.NoError(t, err)
_, err = sr.Seek(tt.seek2, tt.whence)
}
if tt.wantErr != nil {
tt.wantErr(t, err)
return
} else {
require.NoError(t, err)
}
buf := make([]byte, 1024)
n, err := sr.Read(buf)
require.NoError(t, err)
require.Equal(t, tt.expected, string(buf[:n]))
})
}
}
func moveOffset(t *testing.T, reader io.ReadSeeker, offset int64) io.Reader {
pos, err := reader.Seek(offset, io.SeekStart)
require.NoError(t, err)
require.Equal(t, offset, pos)
return reader
}

View File

@ -0,0 +1,93 @@
package purls
import (
"bufio"
"errors"
"fmt"
"io"
"strings"
"github.com/anchore/packageurl-go"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/format/internal"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/sbom"
)
var _ sbom.FormatDecoder = (*decoder)(nil)
type decoder struct{}
func NewFormatDecoder() sbom.FormatDecoder {
return decoder{}
}
func (d decoder) Decode(r io.Reader) (*sbom.SBOM, sbom.FormatID, string, error) {
if r == nil {
return nil, "", "", fmt.Errorf("no reader provided")
}
s, err := toSyftModel(r)
return s, ID, version, err
}
func (d decoder) Identify(r io.Reader) (sbom.FormatID, string) {
if r == nil {
return "", ""
}
scanner := bufio.NewScanner(r)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line == "" {
// skip whitespace only lines
continue
}
if strings.HasPrefix(line, "pkg:") {
_, err := packageurl.FromString(line)
if err != nil {
log.WithFields("error", err, "line", line).Debug("unable to parse purl")
continue
}
return ID, version
}
// not a purl, so we can't identify the format as a list of purls
return "", ""
}
return "", ""
}
func toSyftModel(r io.Reader) (*sbom.SBOM, error) {
var errs []error
pkgs := pkg.NewCollection()
scanner := bufio.NewScanner(r)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line == "" {
continue
}
// skip invalid PURLs
_, err := packageurl.FromString(line)
if err != nil {
log.WithFields("error", err, "line", line).Debug("unable to parse purl")
continue
}
p := pkg.Package{
// name, version and other properties set during Backfill
PURL: line,
}
internal.Backfill(&p)
p.SetID()
pkgs.Add(p)
}
return &sbom.SBOM{
Artifacts: sbom.Artifacts{
Packages: pkgs,
},
}, errors.Join(errs...)
}

View File

@ -0,0 +1,241 @@
package purls
import (
"bytes"
"regexp"
"slices"
"strings"
"testing"
"github.com/google/go-cmp/cmp"
"github.com/stretchr/testify/require"
"github.com/anchore/packageurl-go"
"github.com/anchore/syft/internal/cmptest"
"github.com/anchore/syft/syft/pkg"
)
func TestDecoder_Decode(t *testing.T) {
tests := []struct {
purl string
expected []pkg.Package
}{
{
purl: "pkg:generic/some-package@1.2.3",
expected: []pkg.Package{
{
Name: "some-package",
Type: pkg.UnknownPkg,
Version: "1.2.3",
PURL: "pkg:generic/some-package@1.2.3",
},
},
},
{
purl: "pkg:npm/some-package@1.2.3",
expected: []pkg.Package{
{
Name: "some-package",
Type: pkg.NpmPkg,
Language: pkg.JavaScript,
Version: "1.2.3",
PURL: "pkg:npm/some-package@1.2.3",
},
},
},
{
purl: "pkg:apk/curl@7.61.1",
expected: []pkg.Package{
{
Name: "curl",
Version: "7.61.1",
Type: pkg.ApkPkg,
PURL: "pkg:apk/curl@7.61.1",
},
},
},
{
purl: "pkg:deb/debian/sysv-rc@2.88dsf-59?arch=all&distro=debian-jessie&upstream=sysvinit",
expected: []pkg.Package{
{
Name: "sysv-rc",
Version: "2.88dsf-59",
Type: pkg.DebPkg,
PURL: "pkg:deb/debian/sysv-rc@2.88dsf-59?arch=all&distro=debian-jessie&upstream=sysvinit",
},
},
},
{
purl: "pkg:apk/libcrypto3@3.3.2?upstream=openssl",
expected: []pkg.Package{
{
Name: "libcrypto3",
Version: "3.3.2",
Type: pkg.ApkPkg,
PURL: "pkg:apk/libcrypto3@3.3.2?upstream=openssl",
},
},
},
{
purl: "pkg:apk/libcrypto3@3.3.2?upstream=openssl%403.2.1", // %40 is @
expected: []pkg.Package{
{
Name: "libcrypto3",
Version: "3.3.2",
Type: pkg.ApkPkg,
PURL: "pkg:apk/libcrypto3@3.3.2?upstream=openssl%403.2.1",
},
},
},
{
purl: "pkg:rpm/redhat/systemd-x@239-82.el8_10.2?arch=aarch64&distro=rhel-8.10&upstream=systemd-239-82.el8_10.2.src.rpm",
expected: []pkg.Package{
{
Name: "systemd-x",
Version: "239-82.el8_10.2",
Type: pkg.RpmPkg,
PURL: "pkg:rpm/redhat/systemd-x@239-82.el8_10.2?arch=aarch64&distro=rhel-8.10&upstream=systemd-239-82.el8_10.2.src.rpm",
},
},
},
{
purl: "pkg:rpm/redhat/dbus-common@1.12.8-26.el8?arch=noarch&distro=rhel-8.10&epoch=1&upstream=dbus-1.12.8-26.el8.src.rpm",
expected: []pkg.Package{
{
Name: "dbus-common",
Version: "1:1.12.8-26.el8",
Type: pkg.RpmPkg,
PURL: "pkg:rpm/redhat/dbus-common@1.12.8-26.el8?arch=noarch&distro=rhel-8.10&epoch=1&upstream=dbus-1.12.8-26.el8.src.rpm",
},
},
},
{
purl: "pkg:apk/curl@7.61.1?arch=aarch64&distro=alpine-3.20.3",
expected: []pkg.Package{
{
Name: "curl",
Version: "7.61.1",
Type: pkg.ApkPkg,
PURL: "pkg:apk/curl@7.61.1?arch=aarch64&distro=alpine-3.20.3",
},
},
},
{
purl: "pkg:golang/k8s.io/ingress-nginx@v1.11.2",
expected: []pkg.Package{
{
Name: "k8s.io/ingress-nginx",
Version: "v1.11.2",
Type: pkg.GoModulePkg,
Language: pkg.Go,
PURL: "pkg:golang/k8s.io/ingress-nginx@v1.11.2",
},
},
},
{
purl: "pkg:golang/github.com/wazuh/wazuh@v4.5.0",
expected: []pkg.Package{
{
Name: "github.com/wazuh/wazuh",
Version: "v4.5.0",
Type: pkg.GoModulePkg,
PURL: "pkg:golang/github.com/wazuh/wazuh@v4.5.0",
Language: pkg.Go,
},
},
},
{
purl: "pkg:golang/wazuh@v4.5.0",
expected: []pkg.Package{
{
Name: "wazuh",
Version: "v4.5.0",
Type: pkg.GoModulePkg,
PURL: "pkg:golang/wazuh@v4.5.0",
Language: pkg.Go,
},
},
},
{
purl: "pkg:maven/org.apache/some-pkg@4.11.3",
expected: []pkg.Package{
{
Name: "some-pkg",
Version: "4.11.3",
Type: pkg.JavaPkg,
PURL: "pkg:maven/org.apache/some-pkg@4.11.3",
Language: pkg.Java,
Metadata: pkg.JavaArchive{
PomProperties: &pkg.JavaPomProperties{
GroupID: "org.apache",
ArtifactID: "some-pkg",
Version: "4.11.3",
},
},
},
},
},
}
for _, test := range tests {
t.Run(test.purl, func(t *testing.T) {
dec := NewFormatDecoder()
got, _, _, err := dec.Decode(strings.NewReader(test.purl))
require.NoError(t, err)
if diff := cmp.Diff(test.expected, got.Artifacts.Packages.Sorted(), cmptest.DefaultOptions()...); diff != "" {
t.Errorf("expected packages (-want +got):\n%s", diff)
}
})
}
}
func Test_DecodeEncodeCycle(t *testing.T) {
tests := []struct {
name string
input string
}{
{
name: "basic",
input: "pkg:generic/some-package@1.2.3",
},
{
name: "multiple",
input: "pkg:generic/pkg1\npkg:generic/pkg2\n\npkg:npm/@vercel/ncc@2.9.5",
},
{
name: "java",
input: "pkg:maven/org.apache/some-thing@4.11.3",
},
{
name: "leading whitespace",
input: " \n \t pkg:maven/org.apache/some-thing@4.11.3",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
dec := NewFormatDecoder()
decodedSBOM, _, _, err := dec.Decode(strings.NewReader(tt.input))
require.NoError(t, err)
var buf bytes.Buffer
enc := NewFormatEncoder()
require.NoError(t, enc.Encode(&buf, *decodedSBOM))
in := strings.TrimSpace(regexp.MustCompile(`\s+`).ReplaceAllString(strings.TrimSpace(tt.input), "\n"))
expected := strings.Split(in, "\n")
slices.Sort(expected)
got := strings.Split(strings.TrimSpace(buf.String()), "\n")
slices.Sort(got)
require.EqualValues(t, expected, got)
for _, item := range got {
// require every result is a valid PURL -- no whitespace lines, etc.
_, err = packageurl.FromString(item)
require.NoError(t, err)
}
})
}
}

View File

@ -0,0 +1,62 @@
package purls
import (
"io"
"strings"
"github.com/scylladb/go-set/strset"
"github.com/anchore/packageurl-go"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/sbom"
)
const ID sbom.FormatID = "purls"
const version = "1"
type encoder struct {
}
func NewFormatEncoder() sbom.FormatEncoder {
return encoder{}
}
func (e encoder) ID() sbom.FormatID {
return ID
}
func (e encoder) Aliases() []string {
return []string{
"purl",
}
}
func (e encoder) Version() string {
return sbom.AnyVersion
}
func (e encoder) Encode(writer io.Writer, s sbom.SBOM) error {
output := strset.New()
for _, p := range s.Artifacts.Packages.Sorted() {
purl := strings.TrimSpace(p.PURL)
if purl == "" || output.Has(purl) {
continue
}
// ensure syft doesn't output invalid PURLs in this format
_, err := packageurl.FromString(purl)
if err != nil {
log.Debugf("invalid purl: %q", purl)
continue
}
output.Add(purl)
_, err = writer.Write([]byte(purl))
if err != nil {
return err
}
_, err = writer.Write([]byte("\n"))
if err != nil {
return err
}
}
return nil
}

View File

@ -0,0 +1,60 @@
package purls
import (
"flag"
"testing"
"github.com/anchore/syft/syft/format/internal/testutil"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/sbom"
)
var updateSnapshot = flag.Bool("update-purls", false, "update the *.golden files for purls format")
func Test_Encoder(t *testing.T) {
pkgs := []pkg.Package{
{
Name: "npmtest",
Version: "1.5.1",
Type: pkg.NpmPkg,
Language: pkg.JavaScript,
PURL: "pkg:npm/npmtest@1.5.1",
},
{
Name: "npmtest2",
Version: "1.5.1",
Type: pkg.NpmPkg,
Language: pkg.JavaScript,
PURL: "pkg:npm/npmtest@1.5.1", // duplicate should not be included
},
{
Name: "npmtest",
Version: "3.1.1",
Type: pkg.NpmPkg,
Language: pkg.JavaScript,
PURL: "http://npm/npmtest@3.1.1", // invalid PURL should not be included
},
{
Name: "javatest",
Version: "0.30.1",
Type: pkg.JavaPkg,
Language: pkg.Java,
PURL: "pkg:maven/org.apache/javatest@0.30.1",
},
{
Type: pkg.UnknownPkg,
PURL: "pkg:generic/generic@1.2.3",
},
}
testutil.AssertEncoderAgainstGoldenSnapshot(t,
testutil.EncoderSnapshotTestConfig{
Subject: sbom.SBOM{Artifacts: sbom.Artifacts{
Packages: pkg.NewCollection(pkgs...),
}},
Format: NewFormatEncoder(),
UpdateSnapshot: *updateSnapshot,
PersistRedactionsInSnapshot: true,
IsJSON: false,
},
)
}

View File

@ -0,0 +1,3 @@
pkg:generic/generic@1.2.3
pkg:maven/org.apache/javatest@0.30.1
pkg:npm/npmtest@1.5.1

View File

@ -2,6 +2,7 @@ package spdxjson
import (
"fmt"
"io"
"os"
"path/filepath"
"testing"
@ -91,6 +92,8 @@ func TestDecoder_Decode(t *testing.T) {
reader, err := os.Open(filepath.Join("test-fixtures", "spdx", test.name))
require.NoError(t, err)
reset := func() { _, err = reader.Seek(0, io.SeekStart); require.NoError(t, err) }
dec := NewFormatDecoder()
formatID, formatVersion := dec.Identify(reader)
@ -98,6 +101,7 @@ func TestDecoder_Decode(t *testing.T) {
assert.Equal(t, test.id, formatID)
assert.Equal(t, test.version, formatVersion)
reset()
_, decodeID, decodeVersion, err := dec.Decode(reader)
require.Error(t, err)
assert.Equal(t, test.id, decodeID)
@ -108,6 +112,7 @@ func TestDecoder_Decode(t *testing.T) {
assert.Equal(t, test.id, formatID)
assert.Equal(t, test.version, formatVersion)
reset()
s, decodeID, decodeVersion, err := dec.Decode(reader)
require.NoError(t, err)

View File

@ -8,7 +8,6 @@ import (
"github.com/spdx/tools-golang/tagvalue"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/format/common/spdxhelpers"
"github.com/anchore/syft/syft/format/internal/stream"
"github.com/anchore/syft/syft/sbom"
@ -40,8 +39,8 @@ func (d decoder) Decode(r io.Reader) (*sbom.SBOM, sbom.FormatID, string, error)
return nil, "", "", fmt.Errorf("unsupported spdx tag-value document version")
}
if _, err := reader.Seek(0, io.SeekStart); err != nil {
return nil, "", "", fmt.Errorf("unable to seek to start of SPDX Tag-Value SBOM: %+v", err)
if _, err = reader.Seek(0, io.SeekStart); err != nil {
return nil, "", "", fmt.Errorf("unable to seek to start of SPDX Tag-Value SBOM: %w", err)
}
doc, err := tagvalue.Read(reader)
@ -57,13 +56,7 @@ func (d decoder) Decode(r io.Reader) (*sbom.SBOM, sbom.FormatID, string, error)
}
func (d decoder) Identify(r io.Reader) (sbom.FormatID, string) {
reader, err := stream.SeekableReader(r)
if err != nil {
return "", ""
}
if _, err := reader.Seek(0, io.SeekStart); err != nil {
log.Debugf("unable to seek to start of SPDX Tag-Value SBOM: %+v", err)
if r == nil {
return "", ""
}
@ -72,7 +65,7 @@ func (d decoder) Identify(r io.Reader) (sbom.FormatID, string) {
// DataLicense: CC0-1.0
// SPDXID: SPDXRef-DOCUMENT
scanner := bufio.NewScanner(reader)
scanner := bufio.NewScanner(r)
scanner.Split(bufio.ScanLines)
var id sbom.FormatID

View File

@ -2,6 +2,7 @@ package spdxtagvalue
import (
"fmt"
"io"
"os"
"path/filepath"
"strings"
@ -56,6 +57,10 @@ func TestDecoder_Decode(t *testing.T) {
assert.Equal(t, ID, formatID)
assert.NotEmpty(t, formatVersion)
// reset reader
_, err = reader.Seek(0, io.SeekStart)
require.NoError(t, err)
bom, decodeID, decodeVersion, err := dec.Decode(reader)
require.NotNil(t, bom)
require.NoError(t, err)

View File

@ -53,13 +53,7 @@ func (d decoder) Decode(r io.Reader) (*sbom.SBOM, sbom.FormatID, string, error)
}
func (d decoder) Identify(r io.Reader) (sbom.FormatID, string) {
reader, err := stream.SeekableReader(r)
if err != nil {
return "", ""
}
if _, err := reader.Seek(0, io.SeekStart); err != nil {
log.Debugf("unable to seek to start of Syft JSON SBOM: %+v", err)
if r == nil {
return "", ""
}
@ -67,10 +61,10 @@ func (d decoder) Identify(r io.Reader) (sbom.FormatID, string) {
Schema model.Schema `json:"schema"`
}
dec := json.NewDecoder(reader)
dec := json.NewDecoder(r)
var doc Document
if err = dec.Decode(&doc); err != nil {
if err := dec.Decode(&doc); err != nil {
// maybe not json? maybe not valid? doesn't matter, we won't process it.
return "", ""
}

View File

@ -16,6 +16,7 @@ import (
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/cpe"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/format/internal"
"github.com/anchore/syft/syft/format/syftjson/model"
"github.com/anchore/syft/syft/linux"
"github.com/anchore/syft/syft/pkg"
@ -351,6 +352,8 @@ func toSyftPackage(p model.Package, idAliases map[string]string) pkg.Package {
Metadata: p.Metadata,
}
internal.Backfill(&out)
// always prefer the IDs from the SBOM over derived IDs
out.OverrideID(artifact.ID(p.ID))

View File

@ -62,7 +62,6 @@
},
{
"type": "library",
"group": "org.example",
"name": "basic-lib",
"version": "1.0"
}

View File

@ -10,6 +10,7 @@ import (
const (
PURLQualifierArch = "arch"
PURLQualifierCPES = "cpes"
PURLQualifierDistro = "distro"
PURLQualifierEpoch = "epoch"
PURLQualifierVCSURL = "vcs_url"