fix: #953 Derive language from pURL - https://github.com/anchore/syft… (#957)

Signed-off-by: Christopher Phillips <christopher.phillips@anchore.com>
This commit is contained in:
Jon McEwen 2022-04-26 16:51:24 +01:00 committed by GitHub
parent c270ee2a02
commit 7304bbf8ee
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 59 additions and 14 deletions

View File

@ -86,6 +86,10 @@ func decodeComponent(c *cyclonedx.Component) *pkg.Package {
p.Type = pkg.TypeFromPURL(p.PURL)
}
if p.Language == "" {
p.Language = pkg.LanguageFromPURL(p.PURL)
}
return p
}

View File

@ -191,3 +191,31 @@ func Test_deriveBomRef(t *testing.T) {
})
}
}
func Test_decodeComponent(t *testing.T) {
javaComponentWithNoSyftProperties := cyclonedx.Component{
Name: "ch.qos.logback/logback-classic",
Version: "1.2.3",
PackageURL: "pkg:maven/ch.qos.logback/logback-classic@1.2.3",
Type: "library",
BOMRef: "pkg:maven/ch.qos.logback/logback-classic@1.2.3",
}
tests := []struct {
name string
component cyclonedx.Component
want pkg.Language
}{
{
name: "derive language from pURL if missing",
component: javaComponentWithNoSyftProperties,
want: pkg.Java,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
assert.Equal(t, tt.want, decodeComponent(&tt.component).Language)
})
}
}

View File

@ -70,6 +70,13 @@ func Catalog(resolver source.FileResolver, release *linux.Release, catalogers ..
// generate PURL (note: this is excluded from package ID, so is safe to mutate)
p.PURL = pkg.URL(p, release)
// if we were not able to identify the language we have an opportunity
// to try and get this value from the PURL. Worst case we assert that
// we could not identify the language at either stage and set UnknownLanguage
if p.Language == "" {
p.Language = pkg.LanguageFromPURL(p.PURL)
}
// create file-to-package relationships for files owned by the package
owningRelationships, err := packageFileOwnershipRelationships(p, resolver)
if err != nil {

View File

@ -11,7 +11,7 @@ type Language string
const (
// the full set of supported programming languages
UnknownLanguage Language = "UnknownLanguage"
UnknownLanguage Language = ""
Java Language = "java"
JavaScript Language = "javascript"
Python Language = "python"

View File

@ -7,6 +7,7 @@ import (
"github.com/anchore/syft/internal/formats/cyclonedxxml"
"github.com/anchore/syft/internal/formats/syftjson"
"github.com/anchore/syft/syft/source"
"github.com/google/go-cmp/cmp"
"regexp"
"testing"
@ -21,12 +22,14 @@ import (
)
// TestEncodeDecodeEncodeCycleComparison is testing for differences in how SBOM documents get encoded on multiple cycles.
// By encding and decoding the sbom we can compare the differences between the set of resulting objects. However,
// By encoding and decoding the sbom we can compare the differences between the set of resulting objects. However,
// this requires specific comparisons being done, and select redactions/omissions being made. Additionally, there are
// already unit tests on each format encoder-decoder for properly functioning comparisons in depth, so there is no need
// to do an object-to-object comparison. For this reason this test focuses on a bytes-to-bytes comparison after an
// encode-decode-encode loop which will detect lossy behavior in both directions.
func TestEncodeDecodeEncodeCycleComparison(t *testing.T) {
// use second image for relationships
images := []string{"image-pkg-coverage", "image-owning-package"}
tests := []struct {
formatOption sbom.FormatID
redactor func(in []byte) []byte
@ -34,7 +37,11 @@ func TestEncodeDecodeEncodeCycleComparison(t *testing.T) {
}{
{
formatOption: syftjson.ID,
json: true,
redactor: func(in []byte) []byte {
in = regexp.MustCompile("\"(id|parent)\": \"[^\"]+\",").ReplaceAll(in, []byte{})
return in
},
json: true,
},
{
formatOption: cyclonedxjson.ID,
@ -55,9 +62,8 @@ func TestEncodeDecodeEncodeCycleComparison(t *testing.T) {
}
for _, test := range tests {
// use second image for relationships
for _, image := range []string{"image-pkg-coverage", "image-owning-package"} {
t.Run(fmt.Sprintf("%s/%s", test.formatOption, image), func(t *testing.T) {
t.Run(fmt.Sprintf("%s", test.formatOption), func(t *testing.T) {
for _, image := range images {
originalSBOM, _ := catalogFixtureImage(t, image, source.SquashedScope)
format := syft.FormatByID(test.formatOption)
@ -81,15 +87,15 @@ func TestEncodeDecodeEncodeCycleComparison(t *testing.T) {
if test.json {
s1 := string(by1)
s2 := string(by2)
assert.JSONEq(t, s1, s2)
} else {
if !assert.True(t, bytes.Equal(by1, by2)) {
dmp := diffmatchpatch.New()
diffs := dmp.DiffMain(string(by1), string(by2), true)
t.Errorf("diff: %s", dmp.DiffPrettyText(diffs))
if diff := cmp.Diff(s1, s2); diff != "" {
t.Errorf("Encode/Decode mismatch (-want +got):\n%s", diff)
}
} else if !assert.True(t, bytes.Equal(by1, by2)) {
dmp := diffmatchpatch.New()
diffs := dmp.DiffMain(string(by1), string(by2), true)
t.Errorf("diff: %s", dmp.DiffPrettyText(diffs))
}
})
}
}
})
}
}