Add digest property to parent and nested java package metadata (#941)

This commit is contained in:
Christopher Angelo Phillips 2022-04-08 15:12:32 -04:00 committed by GitHub
parent e415bb21e7
commit 782b2e3348
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 140 additions and 23 deletions

View File

@ -4,12 +4,15 @@ import (
"fmt"
"strings"
syftFile "github.com/anchore/syft/syft/file"
"github.com/CycloneDX/cyclonedx-go"
"github.com/anchore/syft/syft/pkg"
)
//nolint:funlen, gocognit
func encodeExternalReferences(p pkg.Package) *[]cyclonedx.ExternalReference {
refs := []cyclonedx.ExternalReference{}
var refs []cyclonedx.ExternalReference
if hasMetadata(p) {
switch metadata := p.Metadata.(type) {
case pkg.ApkMetadata:
@ -46,6 +49,19 @@ func encodeExternalReferences(p pkg.Package) *[]cyclonedx.ExternalReference {
Type: cyclonedx.ERTypeWebsite,
})
}
case pkg.JavaMetadata:
if len(metadata.ArchiveDigests) > 0 {
for _, digest := range metadata.ArchiveDigests {
refs = append(refs, cyclonedx.ExternalReference{
URL: "",
Type: cyclonedx.ERTypeBuildMeta,
Hashes: &[]cyclonedx.Hash{{
Algorithm: cyclonedx.HashAlgorithm(digest.Algorithm),
Value: digest.Value,
}},
})
}
}
case pkg.PythonPackageMetadata:
if metadata.DirectURLOrigin != nil && metadata.DirectURLOrigin.URL != "" {
ref := cyclonedx.ExternalReference{
@ -79,6 +95,20 @@ func decodeExternalReferences(c *cyclonedx.Component, metadata interface{}) {
meta.Homepage = refURL(c, cyclonedx.ERTypeWebsite)
case *pkg.GemMetadata:
meta.Homepage = refURL(c, cyclonedx.ERTypeWebsite)
case *pkg.JavaMetadata:
var digests []syftFile.Digest
if ref := findExternalRef(c, cyclonedx.ERTypeBuildMeta); ref != nil {
if ref.Hashes != nil {
for _, hash := range *ref.Hashes {
digests = append(digests, syftFile.Digest{
Algorithm: string(hash.Algorithm),
Value: hash.Value,
})
}
}
}
meta.ArchiveDigests = digests
case *pkg.PythonPackageMetadata:
if meta.DirectURLOrigin == nil {
meta.DirectURLOrigin = &pkg.PythonDirectURLOriginInfo{}

View File

@ -22,5 +22,6 @@ func ExternalRefs(p pkg.Package) (externalRefs []ExternalRef) {
ReferenceType: PurlExternalRefType,
})
}
return externalRefs
}

View File

@ -309,6 +309,14 @@ func extractMetadata(p *spdx.Package2_2, info pkgInfo) (pkg.MetadataType, interf
Architecture: arch,
Maintainer: p.PackageOriginatorPerson,
}
case pkg.JavaPkg:
var digests []file.Digest
for algorithm, value := range p.PackageChecksums {
digests = append(digests, file.Digest{Algorithm: string(algorithm), Value: value.Value})
}
return pkg.JavaMetadataType, pkg.JavaMetadata{
ArchiveDigests: digests,
}
}
return pkg.UnknownMetadataType, nil
}

View File

@ -55,14 +55,32 @@ func toPackages(catalog *pkg.Catalog, relationships []artifact.Relationship) []m
for _, p := range catalog.Sorted() {
license := spdxhelpers.License(p)
packageSpdxID := model.ElementID(p.ID()).String()
filesAnalyzed := false
// we generate digest for some Java packages
// see page 33 of the spdx specification for 2.2
// spdx.github.io/spdx-spec/package-information/#710-package-checksum-field
var checksums []model.Checksum
if p.MetadataType == pkg.JavaMetadataType {
javaMetadata := p.Metadata.(pkg.JavaMetadata)
if len(javaMetadata.ArchiveDigests) > 0 {
filesAnalyzed = true
for _, digest := range javaMetadata.ArchiveDigests {
checksums = append(checksums, model.Checksum{
Algorithm: digest.Algorithm,
ChecksumValue: digest.Value,
})
}
}
}
// note: the license concluded and declared should be the same since we are collecting license information
// from the project data itself (the installed package files).
packages = append(packages, model.Package{
Checksums: checksums,
Description: spdxhelpers.Description(p),
DownloadLocation: spdxhelpers.DownloadLocation(p),
ExternalRefs: spdxhelpers.ExternalRefs(p),
FilesAnalyzed: false,
FilesAnalyzed: filesAnalyzed,
HasFiles: fileIDsForPackage(packageSpdxID, relationships),
Homepage: spdxhelpers.Homepage(p),
// The Declared License is what the authors of a project believe govern the package

View File

@ -103,6 +103,24 @@ func toFormatPackages(catalog *pkg.Catalog) map[spdx.ElementID]*spdx.Package2_2
// the Comments on License field (section 3.16) is preferred.
license := spdxhelpers.License(p)
filesAnalyzed := false
checksums := make(map[spdx.ChecksumAlgorithm]spdx.Checksum)
// If the pkg type is Java we have attempted to generated a digest
// FilesAnalyzed should be true in this case
if p.MetadataType == pkg.JavaMetadataType {
javaMetadata := p.Metadata.(pkg.JavaMetadata)
if len(javaMetadata.ArchiveDigests) > 0 {
filesAnalyzed = true
for _, digest := range javaMetadata.ArchiveDigests {
checksums[spdx.ChecksumAlgorithm(digest.Algorithm)] = spdx.Checksum{
Algorithm: spdx.ChecksumAlgorithm(digest.Algorithm),
Value: digest.Value,
}
}
}
}
results[spdx.ElementID(id)] = &spdx.Package2_2{
// NOT PART OF SPEC
@ -159,7 +177,7 @@ func toFormatPackages(catalog *pkg.Catalog) map[spdx.ElementID]*spdx.Package2_2
// Intent: A package can refer to a project, product, artifact, distribution or a component that is
// external to the SPDX document.
FilesAnalyzed: false,
FilesAnalyzed: filesAnalyzed,
// NOT PART OF SPEC: did FilesAnalyzed tag appear?
IsFilesAnalyzedTagPresent: true,
@ -180,6 +198,7 @@ func toFormatPackages(catalog *pkg.Catalog) map[spdx.ElementID]*spdx.Package2_2
// to determine if any file in the original package has been changed. If the SPDX file is to be included
// in a package, this value should not be calculated. The SHA-1 algorithm will be used to provide the
// checksum by default.
PackageChecksums: checksums,
// note: based on the purpose above no discovered checksums should be provided, but instead, only
// tool-derived checksums.

View File

@ -77,30 +77,39 @@ func (i *DigestsCataloger) catalogLocation(resolver source.FileResolver, locatio
}
defer internal.CloseAndLogError(contentReader, location.VirtualPath)
digests, err := DigestsFromFile(contentReader, i.hashes)
if err != nil {
return nil, internal.ErrPath{Context: "digests-cataloger", Path: location.RealPath, Err: err}
}
return digests, nil
}
func DigestsFromFile(closer io.ReadCloser, hashes []crypto.Hash) ([]Digest, error) {
// create a set of hasher objects tied together with a single writer to feed content into
hashers := make([]hash.Hash, len(i.hashes))
writers := make([]io.Writer, len(i.hashes))
for idx, hashObj := range i.hashes {
hashers := make([]hash.Hash, len(hashes))
writers := make([]io.Writer, len(hashes))
for idx, hashObj := range hashes {
hashers[idx] = hashObj.New()
writers[idx] = hashers[idx]
}
size, err := io.Copy(io.MultiWriter(writers...), contentReader)
size, err := io.Copy(io.MultiWriter(writers...), closer)
if err != nil {
return nil, internal.ErrPath{Context: "digests-cataloger", Path: location.RealPath, Err: err}
return nil, err
}
if size == 0 {
return make([]Digest, 0), nil
}
result := make([]Digest, len(i.hashes))
result := make([]Digest, len(hashes))
// only capture digests when there is content. It is important to do this based on SIZE and not
// FILE TYPE. The reasoning is that it is possible for a tar to be crafted with a header-only
// file type but a body is still allowed.
for idx, hasher := range hashers {
result[idx] = Digest{
Algorithm: DigestAlgorithmName(i.hashes[idx]),
Algorithm: DigestAlgorithmName(hashes[idx]),
Value: fmt.Sprintf("%+x", hasher.Sum(nil)),
}
}

View File

@ -1,14 +1,17 @@
package java
import (
"crypto"
"fmt"
"io"
"os"
"path"
"strings"
"github.com/anchore/syft/internal/file"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact"
syftFile "github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/common"
)
@ -34,6 +37,11 @@ var archiveFormatGlobs = []string{
// project that we can build in CI feel free to include it
}
// javaArchiveHashes are all the current hash algorithms used to calculate archive digests
var javaArchiveHashes = []crypto.Hash{
crypto.SHA1,
}
type archiveParser struct {
fileManifest file.ZipFileManifest
virtualPath string
@ -101,6 +109,7 @@ func (j *archiveParser) parse() ([]*pkg.Package, []artifact.Relationship, error)
}
// find aux packages from pom.properties/pom.xml and potentially modify the existing parentPkg
// NOTE: we cannot generate sha1 digests from packages discovered via pom.properties/pom.xml
auxPkgs, err := j.discoverPkgsFromAllMavenFiles(parentPkg)
if err != nil {
return nil, nil, err
@ -135,6 +144,7 @@ func (j *archiveParser) parse() ([]*pkg.Package, []artifact.Relationship, error)
// discoverMainPackage parses the root Java manifest used as the parent package to all discovered nested packages.
func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) {
// search and parse java manifest files
// TODO: do we want to prefer or check for pom files over manifest here?
manifestMatches := j.fileManifest.GlobMatch(manifestGlob)
if len(manifestMatches) > 1 {
return nil, fmt.Errorf("found multiple manifests in the jar: %+v", manifestMatches)
@ -157,6 +167,18 @@ func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) {
return nil, nil
}
archiveCloser, err := os.Open(j.archivePath)
if err != nil {
return nil, fmt.Errorf("unable to open archive path (%s): %w", j.archivePath, err)
}
defer archiveCloser.Close()
// grab and assign digest for the entire archive
digests, err := syftFile.DigestsFromFile(archiveCloser, javaArchiveHashes)
if err != nil {
log.Warnf("failed to create digest for file=%q: %+v", j.archivePath, err)
}
return &pkg.Package{
Name: selectName(manifest, j.fileInfo),
Version: selectVersion(manifest, j.fileInfo),
@ -166,6 +188,7 @@ func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) {
Metadata: pkg.JavaMetadata{
VirtualPath: j.virtualPath,
Manifest: manifest,
ArchiveDigests: digests,
},
}, nil
}
@ -181,12 +204,14 @@ func (j *archiveParser) discoverPkgsFromAllMavenFiles(parentPkg *pkg.Package) ([
var pkgs []*pkg.Package
properties, err := pomPropertiesByParentPath(j.archivePath, j.fileManifest.GlobMatch(pomPropertiesGlob), j.virtualPath)
// pom.properties
properties, err := pomPropertiesByParentPath(j.archivePath, j.virtualPath, j.fileManifest.GlobMatch(pomPropertiesGlob))
if err != nil {
return nil, err
}
projects, err := pomProjectByParentPath(j.archivePath, j.fileManifest.GlobMatch(pomXMLGlob), j.virtualPath)
// pom.xml
projects, err := pomProjectByParentPath(j.archivePath, j.virtualPath, j.fileManifest.GlobMatch(pomXMLGlob))
if err != nil {
return nil, err
}
@ -273,7 +298,7 @@ func discoverPkgsFromOpener(virtualPath, pathWithinArchive string, archiveOpener
return nestedPkgs, nestedRelationships, nil
}
func pomPropertiesByParentPath(archivePath string, extractPaths []string, virtualPath string) (map[string]pkg.PomProperties, error) {
func pomPropertiesByParentPath(archivePath, virtualPath string, extractPaths []string) (map[string]pkg.PomProperties, error) {
contentsOfMavenPropertiesFiles, err := file.ContentsFromZip(archivePath, extractPaths...)
if err != nil {
return nil, fmt.Errorf("unable to extract maven files: %w", err)
@ -298,10 +323,11 @@ func pomPropertiesByParentPath(archivePath string, extractPaths []string, virtua
propertiesByParentPath[path.Dir(filePath)] = *pomProperties
}
return propertiesByParentPath, nil
}
func pomProjectByParentPath(archivePath string, extractPaths []string, virtualPath string) (map[string]pkg.PomProject, error) {
func pomProjectByParentPath(archivePath, virtualPath string, extractPaths []string) (map[string]pkg.PomProject, error) {
contentsOfMavenProjectFiles, err := file.ContentsFromZip(archivePath, extractPaths...)
if err != nil {
return nil, fmt.Errorf("unable to extract maven files: %w", err)

View File

@ -283,6 +283,11 @@ func TestParseJar(t *testing.T) {
metadata := a.Metadata.(pkg.JavaMetadata)
metadata.Parent = nil
// redact Digest which is computed differently between CI and local
if len(metadata.ArchiveDigests) > 0 {
metadata.ArchiveDigests = nil
}
// ignore select fields (only works for the main section)
for _, field := range test.ignoreExtras {
if metadata.Manifest != nil && metadata.Manifest.Main != nil {
@ -567,7 +572,6 @@ func TestParseNestedJar(t *testing.T) {
}
}
}
}
})
}

View File

@ -3,6 +3,7 @@ package pkg
import (
"strings"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/linux"
"github.com/anchore/syft/internal"
@ -24,6 +25,7 @@ type JavaMetadata struct {
Manifest *JavaManifest `mapstructure:"Manifest" json:"manifest,omitempty"`
PomProperties *PomProperties `mapstructure:"PomProperties" json:"pomProperties,omitempty" cyclonedx:"-"`
PomProject *PomProject `mapstructure:"PomProject" json:"pomProject,omitempty"`
ArchiveDigests []file.Digest `hash:"ignore" json:"digest,omitempty"`
PURL string `hash:"ignore" json:"-"` // pURLs and CPEs are ignored for package IDs
Parent *Package `hash:"ignore" json:"-"` // note: the parent cannot be included in the minimal definition of uniqueness since this field is not reproducible in an encode-decode cycle (is lossy).
}