mirror of
https://github.com/anchore/syft.git
synced 2026-02-12 02:26:42 +01:00
Add digest property to parent and nested java package metadata (#941)
This commit is contained in:
parent
e415bb21e7
commit
782b2e3348
@ -4,12 +4,15 @@ import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
syftFile "github.com/anchore/syft/syft/file"
|
||||
|
||||
"github.com/CycloneDX/cyclonedx-go"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
)
|
||||
|
||||
//nolint:funlen, gocognit
|
||||
func encodeExternalReferences(p pkg.Package) *[]cyclonedx.ExternalReference {
|
||||
refs := []cyclonedx.ExternalReference{}
|
||||
var refs []cyclonedx.ExternalReference
|
||||
if hasMetadata(p) {
|
||||
switch metadata := p.Metadata.(type) {
|
||||
case pkg.ApkMetadata:
|
||||
@ -46,6 +49,19 @@ func encodeExternalReferences(p pkg.Package) *[]cyclonedx.ExternalReference {
|
||||
Type: cyclonedx.ERTypeWebsite,
|
||||
})
|
||||
}
|
||||
case pkg.JavaMetadata:
|
||||
if len(metadata.ArchiveDigests) > 0 {
|
||||
for _, digest := range metadata.ArchiveDigests {
|
||||
refs = append(refs, cyclonedx.ExternalReference{
|
||||
URL: "",
|
||||
Type: cyclonedx.ERTypeBuildMeta,
|
||||
Hashes: &[]cyclonedx.Hash{{
|
||||
Algorithm: cyclonedx.HashAlgorithm(digest.Algorithm),
|
||||
Value: digest.Value,
|
||||
}},
|
||||
})
|
||||
}
|
||||
}
|
||||
case pkg.PythonPackageMetadata:
|
||||
if metadata.DirectURLOrigin != nil && metadata.DirectURLOrigin.URL != "" {
|
||||
ref := cyclonedx.ExternalReference{
|
||||
@ -79,6 +95,20 @@ func decodeExternalReferences(c *cyclonedx.Component, metadata interface{}) {
|
||||
meta.Homepage = refURL(c, cyclonedx.ERTypeWebsite)
|
||||
case *pkg.GemMetadata:
|
||||
meta.Homepage = refURL(c, cyclonedx.ERTypeWebsite)
|
||||
case *pkg.JavaMetadata:
|
||||
var digests []syftFile.Digest
|
||||
if ref := findExternalRef(c, cyclonedx.ERTypeBuildMeta); ref != nil {
|
||||
if ref.Hashes != nil {
|
||||
for _, hash := range *ref.Hashes {
|
||||
digests = append(digests, syftFile.Digest{
|
||||
Algorithm: string(hash.Algorithm),
|
||||
Value: hash.Value,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
meta.ArchiveDigests = digests
|
||||
case *pkg.PythonPackageMetadata:
|
||||
if meta.DirectURLOrigin == nil {
|
||||
meta.DirectURLOrigin = &pkg.PythonDirectURLOriginInfo{}
|
||||
|
||||
@ -22,5 +22,6 @@ func ExternalRefs(p pkg.Package) (externalRefs []ExternalRef) {
|
||||
ReferenceType: PurlExternalRefType,
|
||||
})
|
||||
}
|
||||
|
||||
return externalRefs
|
||||
}
|
||||
|
||||
@ -309,6 +309,14 @@ func extractMetadata(p *spdx.Package2_2, info pkgInfo) (pkg.MetadataType, interf
|
||||
Architecture: arch,
|
||||
Maintainer: p.PackageOriginatorPerson,
|
||||
}
|
||||
case pkg.JavaPkg:
|
||||
var digests []file.Digest
|
||||
for algorithm, value := range p.PackageChecksums {
|
||||
digests = append(digests, file.Digest{Algorithm: string(algorithm), Value: value.Value})
|
||||
}
|
||||
return pkg.JavaMetadataType, pkg.JavaMetadata{
|
||||
ArchiveDigests: digests,
|
||||
}
|
||||
}
|
||||
return pkg.UnknownMetadataType, nil
|
||||
}
|
||||
|
||||
@ -55,14 +55,32 @@ func toPackages(catalog *pkg.Catalog, relationships []artifact.Relationship) []m
|
||||
for _, p := range catalog.Sorted() {
|
||||
license := spdxhelpers.License(p)
|
||||
packageSpdxID := model.ElementID(p.ID()).String()
|
||||
filesAnalyzed := false
|
||||
|
||||
// we generate digest for some Java packages
|
||||
// see page 33 of the spdx specification for 2.2
|
||||
// spdx.github.io/spdx-spec/package-information/#710-package-checksum-field
|
||||
var checksums []model.Checksum
|
||||
if p.MetadataType == pkg.JavaMetadataType {
|
||||
javaMetadata := p.Metadata.(pkg.JavaMetadata)
|
||||
if len(javaMetadata.ArchiveDigests) > 0 {
|
||||
filesAnalyzed = true
|
||||
for _, digest := range javaMetadata.ArchiveDigests {
|
||||
checksums = append(checksums, model.Checksum{
|
||||
Algorithm: digest.Algorithm,
|
||||
ChecksumValue: digest.Value,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
// note: the license concluded and declared should be the same since we are collecting license information
|
||||
// from the project data itself (the installed package files).
|
||||
packages = append(packages, model.Package{
|
||||
Checksums: checksums,
|
||||
Description: spdxhelpers.Description(p),
|
||||
DownloadLocation: spdxhelpers.DownloadLocation(p),
|
||||
ExternalRefs: spdxhelpers.ExternalRefs(p),
|
||||
FilesAnalyzed: false,
|
||||
FilesAnalyzed: filesAnalyzed,
|
||||
HasFiles: fileIDsForPackage(packageSpdxID, relationships),
|
||||
Homepage: spdxhelpers.Homepage(p),
|
||||
// The Declared License is what the authors of a project believe govern the package
|
||||
|
||||
@ -103,6 +103,24 @@ func toFormatPackages(catalog *pkg.Catalog) map[spdx.ElementID]*spdx.Package2_2
|
||||
// the Comments on License field (section 3.16) is preferred.
|
||||
license := spdxhelpers.License(p)
|
||||
|
||||
filesAnalyzed := false
|
||||
checksums := make(map[spdx.ChecksumAlgorithm]spdx.Checksum)
|
||||
|
||||
// If the pkg type is Java we have attempted to generated a digest
|
||||
// FilesAnalyzed should be true in this case
|
||||
if p.MetadataType == pkg.JavaMetadataType {
|
||||
javaMetadata := p.Metadata.(pkg.JavaMetadata)
|
||||
if len(javaMetadata.ArchiveDigests) > 0 {
|
||||
filesAnalyzed = true
|
||||
for _, digest := range javaMetadata.ArchiveDigests {
|
||||
checksums[spdx.ChecksumAlgorithm(digest.Algorithm)] = spdx.Checksum{
|
||||
Algorithm: spdx.ChecksumAlgorithm(digest.Algorithm),
|
||||
Value: digest.Value,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
results[spdx.ElementID(id)] = &spdx.Package2_2{
|
||||
|
||||
// NOT PART OF SPEC
|
||||
@ -159,7 +177,7 @@ func toFormatPackages(catalog *pkg.Catalog) map[spdx.ElementID]*spdx.Package2_2
|
||||
|
||||
// Intent: A package can refer to a project, product, artifact, distribution or a component that is
|
||||
// external to the SPDX document.
|
||||
FilesAnalyzed: false,
|
||||
FilesAnalyzed: filesAnalyzed,
|
||||
// NOT PART OF SPEC: did FilesAnalyzed tag appear?
|
||||
IsFilesAnalyzedTagPresent: true,
|
||||
|
||||
@ -180,6 +198,7 @@ func toFormatPackages(catalog *pkg.Catalog) map[spdx.ElementID]*spdx.Package2_2
|
||||
// to determine if any file in the original package has been changed. If the SPDX file is to be included
|
||||
// in a package, this value should not be calculated. The SHA-1 algorithm will be used to provide the
|
||||
// checksum by default.
|
||||
PackageChecksums: checksums,
|
||||
|
||||
// note: based on the purpose above no discovered checksums should be provided, but instead, only
|
||||
// tool-derived checksums.
|
||||
|
||||
@ -77,30 +77,39 @@ func (i *DigestsCataloger) catalogLocation(resolver source.FileResolver, locatio
|
||||
}
|
||||
defer internal.CloseAndLogError(contentReader, location.VirtualPath)
|
||||
|
||||
digests, err := DigestsFromFile(contentReader, i.hashes)
|
||||
if err != nil {
|
||||
return nil, internal.ErrPath{Context: "digests-cataloger", Path: location.RealPath, Err: err}
|
||||
}
|
||||
|
||||
return digests, nil
|
||||
}
|
||||
|
||||
func DigestsFromFile(closer io.ReadCloser, hashes []crypto.Hash) ([]Digest, error) {
|
||||
// create a set of hasher objects tied together with a single writer to feed content into
|
||||
hashers := make([]hash.Hash, len(i.hashes))
|
||||
writers := make([]io.Writer, len(i.hashes))
|
||||
for idx, hashObj := range i.hashes {
|
||||
hashers := make([]hash.Hash, len(hashes))
|
||||
writers := make([]io.Writer, len(hashes))
|
||||
for idx, hashObj := range hashes {
|
||||
hashers[idx] = hashObj.New()
|
||||
writers[idx] = hashers[idx]
|
||||
}
|
||||
|
||||
size, err := io.Copy(io.MultiWriter(writers...), contentReader)
|
||||
size, err := io.Copy(io.MultiWriter(writers...), closer)
|
||||
if err != nil {
|
||||
return nil, internal.ErrPath{Context: "digests-cataloger", Path: location.RealPath, Err: err}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if size == 0 {
|
||||
return make([]Digest, 0), nil
|
||||
}
|
||||
|
||||
result := make([]Digest, len(i.hashes))
|
||||
result := make([]Digest, len(hashes))
|
||||
// only capture digests when there is content. It is important to do this based on SIZE and not
|
||||
// FILE TYPE. The reasoning is that it is possible for a tar to be crafted with a header-only
|
||||
// file type but a body is still allowed.
|
||||
for idx, hasher := range hashers {
|
||||
result[idx] = Digest{
|
||||
Algorithm: DigestAlgorithmName(i.hashes[idx]),
|
||||
Algorithm: DigestAlgorithmName(hashes[idx]),
|
||||
Value: fmt.Sprintf("%+x", hasher.Sum(nil)),
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,14 +1,17 @@
|
||||
package java
|
||||
|
||||
import (
|
||||
"crypto"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path"
|
||||
"strings"
|
||||
|
||||
"github.com/anchore/syft/internal/file"
|
||||
"github.com/anchore/syft/internal/log"
|
||||
"github.com/anchore/syft/syft/artifact"
|
||||
syftFile "github.com/anchore/syft/syft/file"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/common"
|
||||
)
|
||||
@ -34,6 +37,11 @@ var archiveFormatGlobs = []string{
|
||||
// project that we can build in CI feel free to include it
|
||||
}
|
||||
|
||||
// javaArchiveHashes are all the current hash algorithms used to calculate archive digests
|
||||
var javaArchiveHashes = []crypto.Hash{
|
||||
crypto.SHA1,
|
||||
}
|
||||
|
||||
type archiveParser struct {
|
||||
fileManifest file.ZipFileManifest
|
||||
virtualPath string
|
||||
@ -101,6 +109,7 @@ func (j *archiveParser) parse() ([]*pkg.Package, []artifact.Relationship, error)
|
||||
}
|
||||
|
||||
// find aux packages from pom.properties/pom.xml and potentially modify the existing parentPkg
|
||||
// NOTE: we cannot generate sha1 digests from packages discovered via pom.properties/pom.xml
|
||||
auxPkgs, err := j.discoverPkgsFromAllMavenFiles(parentPkg)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
@ -135,6 +144,7 @@ func (j *archiveParser) parse() ([]*pkg.Package, []artifact.Relationship, error)
|
||||
// discoverMainPackage parses the root Java manifest used as the parent package to all discovered nested packages.
|
||||
func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) {
|
||||
// search and parse java manifest files
|
||||
// TODO: do we want to prefer or check for pom files over manifest here?
|
||||
manifestMatches := j.fileManifest.GlobMatch(manifestGlob)
|
||||
if len(manifestMatches) > 1 {
|
||||
return nil, fmt.Errorf("found multiple manifests in the jar: %+v", manifestMatches)
|
||||
@ -157,6 +167,18 @@ func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
archiveCloser, err := os.Open(j.archivePath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to open archive path (%s): %w", j.archivePath, err)
|
||||
}
|
||||
defer archiveCloser.Close()
|
||||
|
||||
// grab and assign digest for the entire archive
|
||||
digests, err := syftFile.DigestsFromFile(archiveCloser, javaArchiveHashes)
|
||||
if err != nil {
|
||||
log.Warnf("failed to create digest for file=%q: %+v", j.archivePath, err)
|
||||
}
|
||||
|
||||
return &pkg.Package{
|
||||
Name: selectName(manifest, j.fileInfo),
|
||||
Version: selectVersion(manifest, j.fileInfo),
|
||||
@ -164,8 +186,9 @@ func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) {
|
||||
Type: j.fileInfo.pkgType(),
|
||||
MetadataType: pkg.JavaMetadataType,
|
||||
Metadata: pkg.JavaMetadata{
|
||||
VirtualPath: j.virtualPath,
|
||||
Manifest: manifest,
|
||||
VirtualPath: j.virtualPath,
|
||||
Manifest: manifest,
|
||||
ArchiveDigests: digests,
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
@ -181,12 +204,14 @@ func (j *archiveParser) discoverPkgsFromAllMavenFiles(parentPkg *pkg.Package) ([
|
||||
|
||||
var pkgs []*pkg.Package
|
||||
|
||||
properties, err := pomPropertiesByParentPath(j.archivePath, j.fileManifest.GlobMatch(pomPropertiesGlob), j.virtualPath)
|
||||
// pom.properties
|
||||
properties, err := pomPropertiesByParentPath(j.archivePath, j.virtualPath, j.fileManifest.GlobMatch(pomPropertiesGlob))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
projects, err := pomProjectByParentPath(j.archivePath, j.fileManifest.GlobMatch(pomXMLGlob), j.virtualPath)
|
||||
// pom.xml
|
||||
projects, err := pomProjectByParentPath(j.archivePath, j.virtualPath, j.fileManifest.GlobMatch(pomXMLGlob))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -273,7 +298,7 @@ func discoverPkgsFromOpener(virtualPath, pathWithinArchive string, archiveOpener
|
||||
return nestedPkgs, nestedRelationships, nil
|
||||
}
|
||||
|
||||
func pomPropertiesByParentPath(archivePath string, extractPaths []string, virtualPath string) (map[string]pkg.PomProperties, error) {
|
||||
func pomPropertiesByParentPath(archivePath, virtualPath string, extractPaths []string) (map[string]pkg.PomProperties, error) {
|
||||
contentsOfMavenPropertiesFiles, err := file.ContentsFromZip(archivePath, extractPaths...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to extract maven files: %w", err)
|
||||
@ -298,10 +323,11 @@ func pomPropertiesByParentPath(archivePath string, extractPaths []string, virtua
|
||||
|
||||
propertiesByParentPath[path.Dir(filePath)] = *pomProperties
|
||||
}
|
||||
|
||||
return propertiesByParentPath, nil
|
||||
}
|
||||
|
||||
func pomProjectByParentPath(archivePath string, extractPaths []string, virtualPath string) (map[string]pkg.PomProject, error) {
|
||||
func pomProjectByParentPath(archivePath, virtualPath string, extractPaths []string) (map[string]pkg.PomProject, error) {
|
||||
contentsOfMavenProjectFiles, err := file.ContentsFromZip(archivePath, extractPaths...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to extract maven files: %w", err)
|
||||
|
||||
@ -283,6 +283,11 @@ func TestParseJar(t *testing.T) {
|
||||
metadata := a.Metadata.(pkg.JavaMetadata)
|
||||
metadata.Parent = nil
|
||||
|
||||
// redact Digest which is computed differently between CI and local
|
||||
if len(metadata.ArchiveDigests) > 0 {
|
||||
metadata.ArchiveDigests = nil
|
||||
}
|
||||
|
||||
// ignore select fields (only works for the main section)
|
||||
for _, field := range test.ignoreExtras {
|
||||
if metadata.Manifest != nil && metadata.Manifest.Main != nil {
|
||||
@ -567,7 +572,6 @@ func TestParseNestedJar(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@ -3,6 +3,7 @@ package pkg
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/anchore/syft/syft/file"
|
||||
"github.com/anchore/syft/syft/linux"
|
||||
|
||||
"github.com/anchore/syft/internal"
|
||||
@ -20,12 +21,13 @@ var jenkinsPluginPomPropertiesGroupIDs = []string{
|
||||
|
||||
// JavaMetadata encapsulates all Java ecosystem metadata for a package as well as an (optional) parent relationship.
|
||||
type JavaMetadata struct {
|
||||
VirtualPath string `json:"virtualPath" cyclonedx:"virtualPath"` // we need to include the virtual path in cyclonedx documents to prevent deduplication of jars within jars
|
||||
Manifest *JavaManifest `mapstructure:"Manifest" json:"manifest,omitempty"`
|
||||
PomProperties *PomProperties `mapstructure:"PomProperties" json:"pomProperties,omitempty" cyclonedx:"-"`
|
||||
PomProject *PomProject `mapstructure:"PomProject" json:"pomProject,omitempty"`
|
||||
PURL string `hash:"ignore" json:"-"` // pURLs and CPEs are ignored for package IDs
|
||||
Parent *Package `hash:"ignore" json:"-"` // note: the parent cannot be included in the minimal definition of uniqueness since this field is not reproducible in an encode-decode cycle (is lossy).
|
||||
VirtualPath string `json:"virtualPath" cyclonedx:"virtualPath"` // we need to include the virtual path in cyclonedx documents to prevent deduplication of jars within jars
|
||||
Manifest *JavaManifest `mapstructure:"Manifest" json:"manifest,omitempty"`
|
||||
PomProperties *PomProperties `mapstructure:"PomProperties" json:"pomProperties,omitempty" cyclonedx:"-"`
|
||||
PomProject *PomProject `mapstructure:"PomProject" json:"pomProject,omitempty"`
|
||||
ArchiveDigests []file.Digest `hash:"ignore" json:"digest,omitempty"`
|
||||
PURL string `hash:"ignore" json:"-"` // pURLs and CPEs are ignored for package IDs
|
||||
Parent *Package `hash:"ignore" json:"-"` // note: the parent cannot be included in the minimal definition of uniqueness since this field is not reproducible in an encode-decode cycle (is lossy).
|
||||
}
|
||||
|
||||
// PomProperties represents the fields of interest extracted from a Java archive's pom.properties file.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user