Add digest property to parent and nested java package metadata (#941)

This commit is contained in:
Christopher Angelo Phillips 2022-04-08 15:12:32 -04:00 committed by GitHub
parent e415bb21e7
commit 782b2e3348
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 140 additions and 23 deletions

View File

@ -4,12 +4,15 @@ import (
"fmt" "fmt"
"strings" "strings"
syftFile "github.com/anchore/syft/syft/file"
"github.com/CycloneDX/cyclonedx-go" "github.com/CycloneDX/cyclonedx-go"
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
) )
//nolint:funlen, gocognit
func encodeExternalReferences(p pkg.Package) *[]cyclonedx.ExternalReference { func encodeExternalReferences(p pkg.Package) *[]cyclonedx.ExternalReference {
refs := []cyclonedx.ExternalReference{} var refs []cyclonedx.ExternalReference
if hasMetadata(p) { if hasMetadata(p) {
switch metadata := p.Metadata.(type) { switch metadata := p.Metadata.(type) {
case pkg.ApkMetadata: case pkg.ApkMetadata:
@ -46,6 +49,19 @@ func encodeExternalReferences(p pkg.Package) *[]cyclonedx.ExternalReference {
Type: cyclonedx.ERTypeWebsite, Type: cyclonedx.ERTypeWebsite,
}) })
} }
case pkg.JavaMetadata:
if len(metadata.ArchiveDigests) > 0 {
for _, digest := range metadata.ArchiveDigests {
refs = append(refs, cyclonedx.ExternalReference{
URL: "",
Type: cyclonedx.ERTypeBuildMeta,
Hashes: &[]cyclonedx.Hash{{
Algorithm: cyclonedx.HashAlgorithm(digest.Algorithm),
Value: digest.Value,
}},
})
}
}
case pkg.PythonPackageMetadata: case pkg.PythonPackageMetadata:
if metadata.DirectURLOrigin != nil && metadata.DirectURLOrigin.URL != "" { if metadata.DirectURLOrigin != nil && metadata.DirectURLOrigin.URL != "" {
ref := cyclonedx.ExternalReference{ ref := cyclonedx.ExternalReference{
@ -79,6 +95,20 @@ func decodeExternalReferences(c *cyclonedx.Component, metadata interface{}) {
meta.Homepage = refURL(c, cyclonedx.ERTypeWebsite) meta.Homepage = refURL(c, cyclonedx.ERTypeWebsite)
case *pkg.GemMetadata: case *pkg.GemMetadata:
meta.Homepage = refURL(c, cyclonedx.ERTypeWebsite) meta.Homepage = refURL(c, cyclonedx.ERTypeWebsite)
case *pkg.JavaMetadata:
var digests []syftFile.Digest
if ref := findExternalRef(c, cyclonedx.ERTypeBuildMeta); ref != nil {
if ref.Hashes != nil {
for _, hash := range *ref.Hashes {
digests = append(digests, syftFile.Digest{
Algorithm: string(hash.Algorithm),
Value: hash.Value,
})
}
}
}
meta.ArchiveDigests = digests
case *pkg.PythonPackageMetadata: case *pkg.PythonPackageMetadata:
if meta.DirectURLOrigin == nil { if meta.DirectURLOrigin == nil {
meta.DirectURLOrigin = &pkg.PythonDirectURLOriginInfo{} meta.DirectURLOrigin = &pkg.PythonDirectURLOriginInfo{}

View File

@ -22,5 +22,6 @@ func ExternalRefs(p pkg.Package) (externalRefs []ExternalRef) {
ReferenceType: PurlExternalRefType, ReferenceType: PurlExternalRefType,
}) })
} }
return externalRefs return externalRefs
} }

View File

@ -309,6 +309,14 @@ func extractMetadata(p *spdx.Package2_2, info pkgInfo) (pkg.MetadataType, interf
Architecture: arch, Architecture: arch,
Maintainer: p.PackageOriginatorPerson, Maintainer: p.PackageOriginatorPerson,
} }
case pkg.JavaPkg:
var digests []file.Digest
for algorithm, value := range p.PackageChecksums {
digests = append(digests, file.Digest{Algorithm: string(algorithm), Value: value.Value})
}
return pkg.JavaMetadataType, pkg.JavaMetadata{
ArchiveDigests: digests,
}
} }
return pkg.UnknownMetadataType, nil return pkg.UnknownMetadataType, nil
} }

View File

@ -55,14 +55,32 @@ func toPackages(catalog *pkg.Catalog, relationships []artifact.Relationship) []m
for _, p := range catalog.Sorted() { for _, p := range catalog.Sorted() {
license := spdxhelpers.License(p) license := spdxhelpers.License(p)
packageSpdxID := model.ElementID(p.ID()).String() packageSpdxID := model.ElementID(p.ID()).String()
filesAnalyzed := false
// we generate digest for some Java packages
// see page 33 of the spdx specification for 2.2
// spdx.github.io/spdx-spec/package-information/#710-package-checksum-field
var checksums []model.Checksum
if p.MetadataType == pkg.JavaMetadataType {
javaMetadata := p.Metadata.(pkg.JavaMetadata)
if len(javaMetadata.ArchiveDigests) > 0 {
filesAnalyzed = true
for _, digest := range javaMetadata.ArchiveDigests {
checksums = append(checksums, model.Checksum{
Algorithm: digest.Algorithm,
ChecksumValue: digest.Value,
})
}
}
}
// note: the license concluded and declared should be the same since we are collecting license information // note: the license concluded and declared should be the same since we are collecting license information
// from the project data itself (the installed package files). // from the project data itself (the installed package files).
packages = append(packages, model.Package{ packages = append(packages, model.Package{
Checksums: checksums,
Description: spdxhelpers.Description(p), Description: spdxhelpers.Description(p),
DownloadLocation: spdxhelpers.DownloadLocation(p), DownloadLocation: spdxhelpers.DownloadLocation(p),
ExternalRefs: spdxhelpers.ExternalRefs(p), ExternalRefs: spdxhelpers.ExternalRefs(p),
FilesAnalyzed: false, FilesAnalyzed: filesAnalyzed,
HasFiles: fileIDsForPackage(packageSpdxID, relationships), HasFiles: fileIDsForPackage(packageSpdxID, relationships),
Homepage: spdxhelpers.Homepage(p), Homepage: spdxhelpers.Homepage(p),
// The Declared License is what the authors of a project believe govern the package // The Declared License is what the authors of a project believe govern the package

View File

@ -103,6 +103,24 @@ func toFormatPackages(catalog *pkg.Catalog) map[spdx.ElementID]*spdx.Package2_2
// the Comments on License field (section 3.16) is preferred. // the Comments on License field (section 3.16) is preferred.
license := spdxhelpers.License(p) license := spdxhelpers.License(p)
filesAnalyzed := false
checksums := make(map[spdx.ChecksumAlgorithm]spdx.Checksum)
// If the pkg type is Java we have attempted to generated a digest
// FilesAnalyzed should be true in this case
if p.MetadataType == pkg.JavaMetadataType {
javaMetadata := p.Metadata.(pkg.JavaMetadata)
if len(javaMetadata.ArchiveDigests) > 0 {
filesAnalyzed = true
for _, digest := range javaMetadata.ArchiveDigests {
checksums[spdx.ChecksumAlgorithm(digest.Algorithm)] = spdx.Checksum{
Algorithm: spdx.ChecksumAlgorithm(digest.Algorithm),
Value: digest.Value,
}
}
}
}
results[spdx.ElementID(id)] = &spdx.Package2_2{ results[spdx.ElementID(id)] = &spdx.Package2_2{
// NOT PART OF SPEC // NOT PART OF SPEC
@ -159,7 +177,7 @@ func toFormatPackages(catalog *pkg.Catalog) map[spdx.ElementID]*spdx.Package2_2
// Intent: A package can refer to a project, product, artifact, distribution or a component that is // Intent: A package can refer to a project, product, artifact, distribution or a component that is
// external to the SPDX document. // external to the SPDX document.
FilesAnalyzed: false, FilesAnalyzed: filesAnalyzed,
// NOT PART OF SPEC: did FilesAnalyzed tag appear? // NOT PART OF SPEC: did FilesAnalyzed tag appear?
IsFilesAnalyzedTagPresent: true, IsFilesAnalyzedTagPresent: true,
@ -180,6 +198,7 @@ func toFormatPackages(catalog *pkg.Catalog) map[spdx.ElementID]*spdx.Package2_2
// to determine if any file in the original package has been changed. If the SPDX file is to be included // to determine if any file in the original package has been changed. If the SPDX file is to be included
// in a package, this value should not be calculated. The SHA-1 algorithm will be used to provide the // in a package, this value should not be calculated. The SHA-1 algorithm will be used to provide the
// checksum by default. // checksum by default.
PackageChecksums: checksums,
// note: based on the purpose above no discovered checksums should be provided, but instead, only // note: based on the purpose above no discovered checksums should be provided, but instead, only
// tool-derived checksums. // tool-derived checksums.

View File

@ -77,30 +77,39 @@ func (i *DigestsCataloger) catalogLocation(resolver source.FileResolver, locatio
} }
defer internal.CloseAndLogError(contentReader, location.VirtualPath) defer internal.CloseAndLogError(contentReader, location.VirtualPath)
digests, err := DigestsFromFile(contentReader, i.hashes)
if err != nil {
return nil, internal.ErrPath{Context: "digests-cataloger", Path: location.RealPath, Err: err}
}
return digests, nil
}
func DigestsFromFile(closer io.ReadCloser, hashes []crypto.Hash) ([]Digest, error) {
// create a set of hasher objects tied together with a single writer to feed content into // create a set of hasher objects tied together with a single writer to feed content into
hashers := make([]hash.Hash, len(i.hashes)) hashers := make([]hash.Hash, len(hashes))
writers := make([]io.Writer, len(i.hashes)) writers := make([]io.Writer, len(hashes))
for idx, hashObj := range i.hashes { for idx, hashObj := range hashes {
hashers[idx] = hashObj.New() hashers[idx] = hashObj.New()
writers[idx] = hashers[idx] writers[idx] = hashers[idx]
} }
size, err := io.Copy(io.MultiWriter(writers...), contentReader) size, err := io.Copy(io.MultiWriter(writers...), closer)
if err != nil { if err != nil {
return nil, internal.ErrPath{Context: "digests-cataloger", Path: location.RealPath, Err: err} return nil, err
} }
if size == 0 { if size == 0 {
return make([]Digest, 0), nil return make([]Digest, 0), nil
} }
result := make([]Digest, len(i.hashes)) result := make([]Digest, len(hashes))
// only capture digests when there is content. It is important to do this based on SIZE and not // only capture digests when there is content. It is important to do this based on SIZE and not
// FILE TYPE. The reasoning is that it is possible for a tar to be crafted with a header-only // FILE TYPE. The reasoning is that it is possible for a tar to be crafted with a header-only
// file type but a body is still allowed. // file type but a body is still allowed.
for idx, hasher := range hashers { for idx, hasher := range hashers {
result[idx] = Digest{ result[idx] = Digest{
Algorithm: DigestAlgorithmName(i.hashes[idx]), Algorithm: DigestAlgorithmName(hashes[idx]),
Value: fmt.Sprintf("%+x", hasher.Sum(nil)), Value: fmt.Sprintf("%+x", hasher.Sum(nil)),
} }
} }

View File

@ -1,14 +1,17 @@
package java package java
import ( import (
"crypto"
"fmt" "fmt"
"io" "io"
"os"
"path" "path"
"strings" "strings"
"github.com/anchore/syft/internal/file" "github.com/anchore/syft/internal/file"
"github.com/anchore/syft/internal/log" "github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/artifact"
syftFile "github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/common" "github.com/anchore/syft/syft/pkg/cataloger/common"
) )
@ -34,6 +37,11 @@ var archiveFormatGlobs = []string{
// project that we can build in CI feel free to include it // project that we can build in CI feel free to include it
} }
// javaArchiveHashes are all the current hash algorithms used to calculate archive digests
var javaArchiveHashes = []crypto.Hash{
crypto.SHA1,
}
type archiveParser struct { type archiveParser struct {
fileManifest file.ZipFileManifest fileManifest file.ZipFileManifest
virtualPath string virtualPath string
@ -101,6 +109,7 @@ func (j *archiveParser) parse() ([]*pkg.Package, []artifact.Relationship, error)
} }
// find aux packages from pom.properties/pom.xml and potentially modify the existing parentPkg // find aux packages from pom.properties/pom.xml and potentially modify the existing parentPkg
// NOTE: we cannot generate sha1 digests from packages discovered via pom.properties/pom.xml
auxPkgs, err := j.discoverPkgsFromAllMavenFiles(parentPkg) auxPkgs, err := j.discoverPkgsFromAllMavenFiles(parentPkg)
if err != nil { if err != nil {
return nil, nil, err return nil, nil, err
@ -135,6 +144,7 @@ func (j *archiveParser) parse() ([]*pkg.Package, []artifact.Relationship, error)
// discoverMainPackage parses the root Java manifest used as the parent package to all discovered nested packages. // discoverMainPackage parses the root Java manifest used as the parent package to all discovered nested packages.
func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) { func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) {
// search and parse java manifest files // search and parse java manifest files
// TODO: do we want to prefer or check for pom files over manifest here?
manifestMatches := j.fileManifest.GlobMatch(manifestGlob) manifestMatches := j.fileManifest.GlobMatch(manifestGlob)
if len(manifestMatches) > 1 { if len(manifestMatches) > 1 {
return nil, fmt.Errorf("found multiple manifests in the jar: %+v", manifestMatches) return nil, fmt.Errorf("found multiple manifests in the jar: %+v", manifestMatches)
@ -157,6 +167,18 @@ func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) {
return nil, nil return nil, nil
} }
archiveCloser, err := os.Open(j.archivePath)
if err != nil {
return nil, fmt.Errorf("unable to open archive path (%s): %w", j.archivePath, err)
}
defer archiveCloser.Close()
// grab and assign digest for the entire archive
digests, err := syftFile.DigestsFromFile(archiveCloser, javaArchiveHashes)
if err != nil {
log.Warnf("failed to create digest for file=%q: %+v", j.archivePath, err)
}
return &pkg.Package{ return &pkg.Package{
Name: selectName(manifest, j.fileInfo), Name: selectName(manifest, j.fileInfo),
Version: selectVersion(manifest, j.fileInfo), Version: selectVersion(manifest, j.fileInfo),
@ -164,8 +186,9 @@ func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) {
Type: j.fileInfo.pkgType(), Type: j.fileInfo.pkgType(),
MetadataType: pkg.JavaMetadataType, MetadataType: pkg.JavaMetadataType,
Metadata: pkg.JavaMetadata{ Metadata: pkg.JavaMetadata{
VirtualPath: j.virtualPath, VirtualPath: j.virtualPath,
Manifest: manifest, Manifest: manifest,
ArchiveDigests: digests,
}, },
}, nil }, nil
} }
@ -181,12 +204,14 @@ func (j *archiveParser) discoverPkgsFromAllMavenFiles(parentPkg *pkg.Package) ([
var pkgs []*pkg.Package var pkgs []*pkg.Package
properties, err := pomPropertiesByParentPath(j.archivePath, j.fileManifest.GlobMatch(pomPropertiesGlob), j.virtualPath) // pom.properties
properties, err := pomPropertiesByParentPath(j.archivePath, j.virtualPath, j.fileManifest.GlobMatch(pomPropertiesGlob))
if err != nil { if err != nil {
return nil, err return nil, err
} }
projects, err := pomProjectByParentPath(j.archivePath, j.fileManifest.GlobMatch(pomXMLGlob), j.virtualPath) // pom.xml
projects, err := pomProjectByParentPath(j.archivePath, j.virtualPath, j.fileManifest.GlobMatch(pomXMLGlob))
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -273,7 +298,7 @@ func discoverPkgsFromOpener(virtualPath, pathWithinArchive string, archiveOpener
return nestedPkgs, nestedRelationships, nil return nestedPkgs, nestedRelationships, nil
} }
func pomPropertiesByParentPath(archivePath string, extractPaths []string, virtualPath string) (map[string]pkg.PomProperties, error) { func pomPropertiesByParentPath(archivePath, virtualPath string, extractPaths []string) (map[string]pkg.PomProperties, error) {
contentsOfMavenPropertiesFiles, err := file.ContentsFromZip(archivePath, extractPaths...) contentsOfMavenPropertiesFiles, err := file.ContentsFromZip(archivePath, extractPaths...)
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to extract maven files: %w", err) return nil, fmt.Errorf("unable to extract maven files: %w", err)
@ -298,10 +323,11 @@ func pomPropertiesByParentPath(archivePath string, extractPaths []string, virtua
propertiesByParentPath[path.Dir(filePath)] = *pomProperties propertiesByParentPath[path.Dir(filePath)] = *pomProperties
} }
return propertiesByParentPath, nil return propertiesByParentPath, nil
} }
func pomProjectByParentPath(archivePath string, extractPaths []string, virtualPath string) (map[string]pkg.PomProject, error) { func pomProjectByParentPath(archivePath, virtualPath string, extractPaths []string) (map[string]pkg.PomProject, error) {
contentsOfMavenProjectFiles, err := file.ContentsFromZip(archivePath, extractPaths...) contentsOfMavenProjectFiles, err := file.ContentsFromZip(archivePath, extractPaths...)
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to extract maven files: %w", err) return nil, fmt.Errorf("unable to extract maven files: %w", err)

View File

@ -283,6 +283,11 @@ func TestParseJar(t *testing.T) {
metadata := a.Metadata.(pkg.JavaMetadata) metadata := a.Metadata.(pkg.JavaMetadata)
metadata.Parent = nil metadata.Parent = nil
// redact Digest which is computed differently between CI and local
if len(metadata.ArchiveDigests) > 0 {
metadata.ArchiveDigests = nil
}
// ignore select fields (only works for the main section) // ignore select fields (only works for the main section)
for _, field := range test.ignoreExtras { for _, field := range test.ignoreExtras {
if metadata.Manifest != nil && metadata.Manifest.Main != nil { if metadata.Manifest != nil && metadata.Manifest.Main != nil {
@ -567,7 +572,6 @@ func TestParseNestedJar(t *testing.T) {
} }
} }
} }
} }
}) })
} }

View File

@ -3,6 +3,7 @@ package pkg
import ( import (
"strings" "strings"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/linux" "github.com/anchore/syft/syft/linux"
"github.com/anchore/syft/internal" "github.com/anchore/syft/internal"
@ -20,12 +21,13 @@ var jenkinsPluginPomPropertiesGroupIDs = []string{
// JavaMetadata encapsulates all Java ecosystem metadata for a package as well as an (optional) parent relationship. // JavaMetadata encapsulates all Java ecosystem metadata for a package as well as an (optional) parent relationship.
type JavaMetadata struct { type JavaMetadata struct {
VirtualPath string `json:"virtualPath" cyclonedx:"virtualPath"` // we need to include the virtual path in cyclonedx documents to prevent deduplication of jars within jars VirtualPath string `json:"virtualPath" cyclonedx:"virtualPath"` // we need to include the virtual path in cyclonedx documents to prevent deduplication of jars within jars
Manifest *JavaManifest `mapstructure:"Manifest" json:"manifest,omitempty"` Manifest *JavaManifest `mapstructure:"Manifest" json:"manifest,omitempty"`
PomProperties *PomProperties `mapstructure:"PomProperties" json:"pomProperties,omitempty" cyclonedx:"-"` PomProperties *PomProperties `mapstructure:"PomProperties" json:"pomProperties,omitempty" cyclonedx:"-"`
PomProject *PomProject `mapstructure:"PomProject" json:"pomProject,omitempty"` PomProject *PomProject `mapstructure:"PomProject" json:"pomProject,omitempty"`
PURL string `hash:"ignore" json:"-"` // pURLs and CPEs are ignored for package IDs ArchiveDigests []file.Digest `hash:"ignore" json:"digest,omitempty"`
Parent *Package `hash:"ignore" json:"-"` // note: the parent cannot be included in the minimal definition of uniqueness since this field is not reproducible in an encode-decode cycle (is lossy). PURL string `hash:"ignore" json:"-"` // pURLs and CPEs are ignored for package IDs
Parent *Package `hash:"ignore" json:"-"` // note: the parent cannot be included in the minimal definition of uniqueness since this field is not reproducible in an encode-decode cycle (is lossy).
} }
// PomProperties represents the fields of interest extracted from a Java archive's pom.properties file. // PomProperties represents the fields of interest extracted from a Java archive's pom.properties file.