persist artifact ID as supplemental package data

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
This commit is contained in:
Alex Goodman 2025-05-07 21:51:58 -04:00
parent 8aaf36b1ad
commit 47cc8b58a7
13 changed files with 186 additions and 52 deletions

View File

@ -499,14 +499,15 @@ func extractPkgInfo(p *spdx.Package) pkgInfo {
func toSyftPackage(p *spdx.Package) pkg.Package {
info := extractPkgInfo(p)
sP := &pkg.Package{
Type: info.typ,
Name: p.PackageName,
Version: p.PackageVersion,
Licenses: pkg.NewLicenseSet(parseSPDXLicenses(p)...),
CPEs: extractCPEs(p),
PURL: purlValue(info.purl),
Language: info.lang,
Metadata: extractMetadata(p, info),
Type: info.typ,
Name: p.PackageName,
Version: p.PackageVersion,
Licenses: pkg.NewLicenseSet(parseSPDXLicenses(p)...),
CPEs: extractCPEs(p),
PURL: purlValue(info.purl),
Language: info.lang,
Metadata: extractMetadata(p, info),
SupplementalData: []any{artifact.ID(p.PackageSPDXIdentifier)},
}
sP.SetID()

View File

@ -589,12 +589,18 @@ func Test_convertToAndFromFormat(t *testing.T) {
got, err := ToSyftModel(doc)
require.NoError(t, err)
for _, p := range got.Artifacts.Packages.Sorted() {
// all decoders should be setting an ID of sorts here (another test will verify the correctness of the value)
assert.NotEmpty(t, p.SupplementalData)
}
if diff := cmp.Diff(&s, got,
cmpopts.IgnoreUnexported(artifact.Relationship{}),
cmpopts.IgnoreUnexported(file.LocationSet{}),
cmpopts.IgnoreUnexported(pkg.Collection{}),
cmpopts.IgnoreUnexported(pkg.Package{}),
cmpopts.IgnoreUnexported(pkg.LicenseSet{}),
cmpopts.IgnoreFields(pkg.Package{}, "SupplementalData"), // this is used by decoders to store additional data from the original format
cmpopts.IgnoreFields(sbom.Artifacts{}, "FileMetadata", "FileDigests"),
); diff != "" {
t.Fatalf("packages do not match:\n%s", diff)
@ -664,7 +670,7 @@ func Test_directPackageFiles(t *testing.T) {
Packages: []*spdx.Package{
{
PackageName: "some-package",
PackageSPDXIdentifier: "1",
PackageSPDXIdentifier: "1", // important!
PackageVersion: "1.0.5",
Files: []*spdx.File{
{
@ -686,10 +692,11 @@ func Test_directPackageFiles(t *testing.T) {
require.NoError(t, err)
p := pkg.Package{
Name: "some-package",
Version: "1.0.5",
Name: "some-package",
Version: "1.0.5",
SupplementalData: []any{artifact.ID("1")}, // set by the decoders from the original element ID
}
p.SetID()
p.OverrideID("1") // the same as the spdxID on the package element
f := file.Location{
LocationData: file.LocationData{
Coordinates: file.Coordinates{

View File

@ -3,6 +3,7 @@ package cyclonedxjson
import (
"bytes"
"flag"
"regexp"
"strings"
"testing"
@ -116,6 +117,14 @@ func TestCycloneDxImageEncoder(t *testing.T) {
func redactor(values ...string) testutil.Redactor {
return testutil.NewRedactions().
WithValuesRedacted(values...).
WithPatternRedactorSpec(
testutil.PatternReplacement{
// only the source component bom-ref (not package or other component bom-refs)
Search: regexp.MustCompile(`"component": \{[^}]*"bom-ref":\s*"(?P<redact>.+)"[^}]*}`),
Groups: []string{"redact"}, // use the regex to anchore the search, but only replace bytes within the capture group
Replace: "redacted",
},
).
WithPatternRedactors(
map[string]string{
// UUIDs
@ -126,9 +135,6 @@ func redactor(values ...string) testutil.Redactor {
// image hashes
`sha256:[A-Fa-f0-9]{64}`: `sha256:redacted`,
// BOM refs
`"bom-ref":\s*"[^"]+"`: `"bom-ref":"redacted"`,
},
)
}

View File

@ -17,14 +17,14 @@
]
},
"component": {
"bom-ref":"redacted",
"bom-ref": "redacted",
"type": "file",
"name": "some/path"
}
},
"components": [
{
"bom-ref":"redacted",
"bom-ref": "4dd25c6ee16b729a",
"type": "library",
"name": "package-1",
"version": "1.0.1",
@ -61,7 +61,7 @@
]
},
{
"bom-ref":"redacted",
"bom-ref": "pkg:deb/debian/package-2@2.0.1?package-id=39392bb5e270f669",
"type": "library",
"name": "package-2",
"version": "2.0.1",
@ -91,7 +91,7 @@
]
},
{
"bom-ref":"redacted",
"bom-ref": "os:debian@1.2.3",
"type": "operating-system",
"name": "debian",
"version": "1.2.3",

View File

@ -17,7 +17,7 @@
]
},
"component": {
"bom-ref":"redacted",
"bom-ref": "redacted",
"type": "container",
"name": "user-image-input",
"version": "sha256:redacted"
@ -25,7 +25,7 @@
},
"components": [
{
"bom-ref":"redacted",
"bom-ref": "72567175418f73f8",
"type": "library",
"name": "package-1",
"version": "1.0.1",
@ -66,7 +66,7 @@
]
},
{
"bom-ref":"redacted",
"bom-ref": "pkg:deb/debian/package-2@2.0.1?package-id=4b756c6f6fb127a3",
"type": "library",
"name": "package-2",
"version": "2.0.1",
@ -100,7 +100,7 @@
]
},
{
"bom-ref":"redacted",
"bom-ref": "os:debian@1.2.3",
"type": "operating-system",
"name": "debian",
"version": "1.2.3",

View File

@ -90,16 +90,24 @@ func TestCycloneDxImageEncoder(t *testing.T) {
func redactor(values ...string) testutil.Redactor {
return testutil.NewRedactions().
WithValuesRedacted(values...).
WithPatternRedactorSpec(
testutil.PatternReplacement{
// only the source component bom-ref (not package or other component bom-refs)
Search: regexp.MustCompile(`<component bom-ref="(?P<redact>[^"]*)" type="file">`),
Groups: []string{"redact"}, // use the regex to anchore the search, but only replace bytes within the capture group
Replace: "redacted",
},
).
WithPatternRedactors(
map[string]string{
// dates
`([0-9]+)-(0[1-9]|1[012])-(0[1-9]|[12][0-9]|3[01])[Tt]([01][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9]|60)(\.[0-9]+)?(([Zz])|([+|\-]([01][0-9]|2[0-3]):[0-5][0-9]))`: `redacted`,
// image hashes and BOM refs
// image hashes
`sha256:[A-Za-z0-9]{64}`: `sha256:redacted`,
// serial numbers and BOM refs
`(serialNumber|bom-ref)="[^"]+"`: `$1="redacted"`,
// serial numbers
`(serialNumber)="[^"]+"`: `$1="redacted"`,
},
)
}

View File

@ -16,7 +16,7 @@
</component>
</metadata>
<components>
<component bom-ref="redacted" type="library">
<component bom-ref="4dd25c6ee16b729a" type="library">
<name>package-1</name>
<version>1.0.1</version>
<licenses>
@ -34,7 +34,7 @@
<property name="syft:location:0:path">/some/path/pkg1</property>
</properties>
</component>
<component bom-ref="redacted" type="library">
<component bom-ref="pkg:deb/debian/package-2@2.0.1?package-id=39392bb5e270f669" type="library">
<name>package-2</name>
<version>2.0.1</version>
<cpe>cpe:2.3:*:some:package:2:*:*:*:*:*:*:*</cpe>
@ -47,7 +47,7 @@
<property name="syft:metadata:installedSize">0</property>
</properties>
</component>
<component bom-ref="redacted" type="operating-system">
<component bom-ref="os:debian@1.2.3" type="operating-system">
<name>debian</name>
<version>1.2.3</version>
<description>debian</description>

View File

@ -11,13 +11,13 @@
</component>
</components>
</tools>
<component bom-ref="redacted" type="container">
<component bom-ref="f28a4ba3ddfdddad" type="container">
<name>user-image-input</name>
<version>sha256:redacted</version>
</component>
</metadata>
<components>
<component bom-ref="redacted" type="library">
<component bom-ref="72567175418f73f8" type="library">
<name>package-1</name>
<version>1.0.1</version>
<licenses>
@ -36,7 +36,7 @@
<property name="syft:location:0:path">/somefile-1.txt</property>
</properties>
</component>
<component bom-ref="redacted" type="library">
<component bom-ref="pkg:deb/debian/package-2@2.0.1?package-id=4b756c6f6fb127a3" type="library">
<name>package-2</name>
<version>2.0.1</version>
<cpe>cpe:2.3:*:some:package:2:*:*:*:*:*:*:*</cpe>
@ -50,7 +50,7 @@
<property name="syft:metadata:installedSize">0</property>
</properties>
</component>
<component bom-ref="redacted" type="operating-system">
<component bom-ref="os:debian@1.2.3" type="operating-system">
<name>debian</name>
<version>1.2.3</version>
<description>debian</description>

View File

@ -6,6 +6,7 @@ import (
"github.com/CycloneDX/cyclonedx-go"
"github.com/anchore/packageurl-go"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/internal/packagemetadata"
"github.com/anchore/syft/syft/pkg"
@ -84,12 +85,13 @@ func decodeComponent(c *cyclonedx.Component) *pkg.Package {
}
p := &pkg.Package{
Name: c.Name,
Version: c.Version,
Locations: decodeLocations(values),
Licenses: pkg.NewLicenseSet(decodeLicenses(c)...),
CPEs: decodeCPEs(c),
PURL: c.PackageURL,
Name: c.Name,
Version: c.Version,
Locations: decodeLocations(values),
Licenses: pkg.NewLicenseSet(decodeLicenses(c)...),
CPEs: decodeCPEs(c),
PURL: c.PackageURL,
SupplementalData: []any{artifact.ID(c.BOMRef)},
}
DecodeInto(p, values, "syft:package", CycloneDXFields)

View File

@ -70,7 +70,6 @@ func collectPackages(component *cyclonedx.Component, s *sbom.SBOM, idMap map[str
if syftID != "" {
idMap[syftID] = p
}
// TODO there must be a better way than needing to call this manually:
p.SetID()
s.Artifacts.Packages.Add(*p)
}

View File

@ -28,6 +28,7 @@ func (r RedactorFn) Redact(b []byte) []byte {
type PatternReplacement struct {
Search *regexp.Regexp
Groups []string
Replace string
}
@ -39,7 +40,67 @@ func NewPatternReplacement(r *regexp.Regexp) PatternReplacement {
}
func (p PatternReplacement) Redact(b []byte) []byte {
return p.Search.ReplaceAll(b, []byte(p.Replace))
if len(p.Groups) == 0 {
return p.Search.ReplaceAll(b, []byte(p.Replace))
}
return p.redactNamedGroups(b)
}
func (p PatternReplacement) redactNamedGroups(b []byte) []byte {
groupsToReplace := make(map[string]bool)
for _, g := range p.Groups {
groupsToReplace[g] = true
}
subexpNames := p.Search.SubexpNames()
return p.Search.ReplaceAllFunc(b, func(match []byte) []byte {
indexes := p.Search.FindSubmatchIndex(match)
if indexes == nil {
return match
}
result := make([]byte, len(match))
copy(result, match)
// keep track of the offset as we replace groups
offset := 0
// process each named group
for i, name := range subexpNames {
// skip the full match (i==0) and groups we don't want to replace
if i == 0 || !groupsToReplace[name] {
continue
}
// get the start and end positions of this group
startPos := indexes[2*i]
endPos := indexes[2*i+1]
// skip if the group didn't match
if startPos < 0 || endPos < 0 {
continue
}
// adjust positions based on previous replacements
startPos += offset
endPos += offset
// replace the group with our replacement text
beforeGroup := result[:startPos]
afterGroup := result[endPos:]
// calculate the new offset
oldLen := endPos - startPos
newLen := len(p.Replace)
offset += (newLen - oldLen)
result = append(beforeGroup, append([]byte(p.Replace), afterGroup...)...) //nolint:gocritic
}
return result
})
}
// Replace by value //////////////////////////////
@ -86,6 +147,13 @@ func (r *Redactions) WithPatternRedactors(values map[string]string) *Redactions
return r
}
func (r *Redactions) WithPatternRedactorSpec(values ...PatternReplacement) *Redactions {
for _, v := range values {
r.redactors = append(r.redactors, v)
}
return r
}
func (r *Redactions) WithValueRedactors(values map[string]string) *Redactions {
for k, v := range values {
r.redactors = append(r.redactors,

View File

@ -3,6 +3,8 @@ package bitnami
import (
"testing"
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/anchore/syft/syft/artifact"
@ -493,6 +495,28 @@ func TestBitnamiCataloger(t *testing.T) {
t.Run(tt.name, func(t *testing.T) {
pkgtest.NewCatalogTester().
FromDirectory(t, tt.fixture).
WithCompareOptions(cmpopts.IgnoreFields(pkg.Package{}, "SupplementalData")).
ExpectsAssertion(
func(t *testing.T, pkgs []pkg.Package, relationships []artifact.Relationship) {
for _, p := range pkgs {
// assert there are supplemental data as artifact.ID and ID() matches
assert.NotEmpty(t, p.SupplementalData)
var id artifact.ID
for _, data := range p.SupplementalData {
switch d := data.(type) {
case artifact.ID:
id = d
break
case artifact.Identifiable:
id = d.ID()
break
}
}
assert.NotEmpty(t, id)
assert.Equal(t, p.ID(), id)
}
},
).
Expects(tt.wantPkgs, tt.wantRelationships).
WithErrorAssertion(tt.wantErr).
TestCataloger(t, NewCataloger())

View File

@ -17,17 +17,18 @@ import (
// Package represents an application or library that has been bundled into a distributable format.
// TODO: if we ignore FoundBy for ID generation should we merge the field to show it was found in two places?
type Package struct {
id artifact.ID `hash:"ignore"`
Name string // the package name
Version string // the version of the package
FoundBy string `hash:"ignore" cyclonedx:"foundBy"` // the specific cataloger that discovered this package
Locations file.LocationSet // the locations that lead to the discovery of this package (note: this is not necessarily the locations that make up this package)
Licenses LicenseSet // licenses discovered with the package metadata
Language Language `hash:"ignore" cyclonedx:"language"` // the language ecosystem this package belongs to (e.g. JavaScript, Python, etc)
Type Type `cyclonedx:"type"` // the package type (e.g. Npm, Yarn, Python, Rpm, Deb, etc)
CPEs []cpe.CPE `hash:"ignore"` // all possible Common Platform Enumerators (note: this is NOT included in the definition of the ID since all fields on a CPE are derived from other fields)
PURL string `hash:"ignore"` // the Package URL (see https://github.com/package-url/purl-spec)
Metadata interface{} // additional data found while parsing the package source
id artifact.ID `hash:"ignore"`
Name string // the package name
Version string // the version of the package
FoundBy string `hash:"ignore" cyclonedx:"foundBy"` // the specific cataloger that discovered this package
Locations file.LocationSet // the locations that lead to the discovery of this package (note: this is not necessarily the locations that make up this package)
Licenses LicenseSet // licenses discovered with the package metadata
Language Language `hash:"ignore" cyclonedx:"language"` // the language ecosystem this package belongs to (e.g. JavaScript, Python, etc)
Type Type `cyclonedx:"type"` // the package type (e.g. Npm, Yarn, Python, Rpm, Deb, etc)
CPEs []cpe.CPE `hash:"ignore"` // all possible Common Platform Enumerators (note: this is NOT included in the definition of the ID since all fields on a CPE are derived from other fields)
PURL string `hash:"ignore"` // the Package URL (see https://github.com/package-url/purl-spec)
Metadata any // additional data found while parsing the package source
SupplementalData []any `hash:"ignore"` // additional data that is not part of the package metadata nor expressed in output formats
}
func (p *Package) OverrideID(id artifact.ID) {
@ -35,6 +36,24 @@ func (p *Package) OverrideID(id artifact.ID) {
}
func (p *Package) SetID() {
for _, data := range p.SupplementalData {
switch d := data.(type) {
case artifact.ID:
if d == "" {
continue
}
p.id = d
return
case artifact.Identifiable:
id := d.ID()
if id == "" {
continue
}
p.id = id
return
}
}
id, err := artifact.IDByHash(p)
if err != nil {
// TODO: what to do in this case?