mirror of
https://github.com/anchore/syft.git
synced 2025-11-18 00:43:20 +01:00
Refactor Java archive parsing logic
Signed-off-by: Dan Luhring <dan.luhring@anchore.com>
This commit is contained in:
parent
65e4e17590
commit
fa7fd718cb
@ -7,7 +7,6 @@ import (
|
|||||||
|
|
||||||
"github.com/anchore/syft/internal/log"
|
"github.com/anchore/syft/internal/log"
|
||||||
|
|
||||||
"github.com/anchore/syft/internal"
|
|
||||||
"github.com/anchore/syft/internal/file"
|
"github.com/anchore/syft/internal/file"
|
||||||
"github.com/anchore/syft/syft/pkg"
|
"github.com/anchore/syft/syft/pkg"
|
||||||
"github.com/anchore/syft/syft/pkg/cataloger/common"
|
"github.com/anchore/syft/syft/pkg/cataloger/common"
|
||||||
@ -25,13 +24,12 @@ var archiveFormatGlobs = []string{
|
|||||||
}
|
}
|
||||||
|
|
||||||
type archiveParser struct {
|
type archiveParser struct {
|
||||||
discoveredPkgs internal.StringSet
|
fileManifest file.ZipFileManifest
|
||||||
fileManifest file.ZipFileManifest
|
virtualPath string
|
||||||
virtualPath string
|
archivePath string
|
||||||
archivePath string
|
contentPath string
|
||||||
contentPath string
|
fileInfo archiveFilename
|
||||||
fileInfo archiveFilename
|
detectNested bool
|
||||||
detectNested bool
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// parseJavaArchive is a parser function for java archive contents, returning all Java libraries and nested archives.
|
// parseJavaArchive is a parser function for java archive contents, returning all Java libraries and nested archives.
|
||||||
@ -71,13 +69,12 @@ func newJavaArchiveParser(virtualPath string, reader io.Reader, detectNested boo
|
|||||||
currentFilepath := virtualElements[len(virtualElements)-1]
|
currentFilepath := virtualElements[len(virtualElements)-1]
|
||||||
|
|
||||||
return &archiveParser{
|
return &archiveParser{
|
||||||
discoveredPkgs: internal.NewStringSet(),
|
fileManifest: fileManifest,
|
||||||
fileManifest: fileManifest,
|
virtualPath: virtualPath,
|
||||||
virtualPath: virtualPath,
|
archivePath: archivePath,
|
||||||
archivePath: archivePath,
|
contentPath: contentPath,
|
||||||
contentPath: contentPath,
|
fileInfo: newJavaArchiveFilename(currentFilepath),
|
||||||
fileInfo: newJavaArchiveFilename(currentFilepath),
|
detectNested: detectNested,
|
||||||
detectNested: detectNested,
|
|
||||||
}, cleanupFn, nil
|
}, cleanupFn, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -91,25 +88,21 @@ func (j *archiveParser) parse() ([]pkg.Package, error) {
|
|||||||
return nil, fmt.Errorf("could not generate package from %s: %w", j.virtualPath, err)
|
return nil, fmt.Errorf("could not generate package from %s: %w", j.virtualPath, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// don't add the parent package yet, we still may discover aux info to add to the metadata (but still track it as added to prevent duplicates)
|
// find aux packages from pom.properties and potentially modify the existing parentPkg
|
||||||
parentKey := uniquePkgKey(parentPkg)
|
|
||||||
if parentKey != "" {
|
|
||||||
j.discoveredPkgs.Add(parentKey)
|
|
||||||
}
|
|
||||||
|
|
||||||
// find aux packages from pom.properties
|
|
||||||
auxPkgs, err := j.discoverPkgsFromAllPomProperties(parentPkg)
|
auxPkgs, err := j.discoverPkgsFromAllPomProperties(parentPkg)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
pkgs = append(pkgs, auxPkgs...)
|
pkgs = append(pkgs, auxPkgs...)
|
||||||
|
|
||||||
// find nested java archive packages
|
if j.detectNested {
|
||||||
nestedPkgs, err := j.discoverPkgsFromNestedArchives(parentPkg)
|
// find nested java archive packages
|
||||||
if err != nil {
|
nestedPkgs, err := j.discoverPkgsFromNestedArchives(parentPkg)
|
||||||
return nil, err
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
pkgs = append(pkgs, nestedPkgs...)
|
||||||
}
|
}
|
||||||
pkgs = append(pkgs, nestedPkgs...)
|
|
||||||
|
|
||||||
// lastly, add the parent package to the list (assuming the parent exists)
|
// lastly, add the parent package to the list (assuming the parent exists)
|
||||||
if parentPkg != nil {
|
if parentPkg != nil {
|
||||||
@ -159,69 +152,71 @@ func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) {
|
|||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// discoverPkgsFromAllPomProperties parses Maven POM properties for a given parent package, returning all listed Java packages found for each pom properties discovered.
|
// discoverPkgsFromAllPomProperties parses Maven POM properties for a given
|
||||||
|
// parent package, returning all listed Java packages found for each pom
|
||||||
|
// properties discovered and potentially updating the given parentPkg with new
|
||||||
|
// data.
|
||||||
func (j *archiveParser) discoverPkgsFromAllPomProperties(parentPkg *pkg.Package) ([]pkg.Package, error) {
|
func (j *archiveParser) discoverPkgsFromAllPomProperties(parentPkg *pkg.Package) ([]pkg.Package, error) {
|
||||||
var pkgs = make([]pkg.Package, 0)
|
if parentPkg == nil {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var pkgs []pkg.Package
|
||||||
|
|
||||||
// search and parse pom.properties files & fetch the contents
|
// search and parse pom.properties files & fetch the contents
|
||||||
contents, err := file.ContentsFromZip(j.archivePath, j.fileManifest.GlobMatch(pomPropertiesGlob)...)
|
contentsOfPomPropertiesFiles, err := file.ContentsFromZip(j.archivePath, j.fileManifest.GlobMatch(pomPropertiesGlob)...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("unable to extract pom.properties: %w", err)
|
return nil, fmt.Errorf("unable to extract pom.properties: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// parse the manifest file into a rich object
|
for filePath, fileContents := range contentsOfPomPropertiesFiles {
|
||||||
for propsPath, propsContents := range contents {
|
// parse the pom properties file into a rich object
|
||||||
propsObj, err := parsePomProperties(propsPath, strings.NewReader(propsContents))
|
pomProperties, err := parsePomProperties(filePath, strings.NewReader(fileContents))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warnf("failed to parse pom.properties (%s): %+v", j.virtualPath, err)
|
log.Warnf("failed to parse pom.properties (%s): %+v", j.virtualPath, err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if propsObj == nil {
|
if pomProperties == nil {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if propsObj.Version == "" || propsObj.ArtifactID == "" {
|
if pomProperties.Version == "" || pomProperties.ArtifactID == "" {
|
||||||
// TODO: if there is no parentPkg (no java manifest) one of these poms could be the parent. We should discover the right parent and attach the correct info accordingly to each discovered package
|
// TODO: if there is no parentPkg (no java manifest) one of these poms could be the parent. We should discover the right parent and attach the correct info accordingly to each discovered package
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if parentPkg == nil {
|
pkgs = append(pkgs, j.packagesFromPomProperties(pomProperties, parentPkg)...)
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
pkgs = append(pkgs, j.packagesFromPomProperties(propsObj, parentPkg)...)
|
|
||||||
}
|
}
|
||||||
return pkgs, nil
|
return pkgs, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// packagesFromPomProperties processes a single Maven POM properties for a given parent package, returning all listed Java packages found and
|
// packagesFromPomProperties processes a single Maven POM properties for a given parent package, returning all listed Java packages found and
|
||||||
// associating each discovered package to the given parent package.
|
// associating each discovered package to the given parent package.
|
||||||
func (j *archiveParser) packagesFromPomProperties(propsObj *pkg.PomProperties, parentPkg *pkg.Package) (pkgs []pkg.Package) {
|
func (j *archiveParser) packagesFromPomProperties(pomProperties *pkg.PomProperties, parentPkg *pkg.Package) []pkg.Package {
|
||||||
parentKey := uniquePkgKey(parentPkg)
|
|
||||||
|
|
||||||
// keep the artifact name within the virtual path if this package does not match the parent package
|
// keep the artifact name within the virtual path if this package does not match the parent package
|
||||||
vPathSuffix := ""
|
vPathSuffix := ""
|
||||||
if !strings.HasPrefix(propsObj.ArtifactID, parentPkg.Name) {
|
if !strings.HasPrefix(pomProperties.ArtifactID, parentPkg.Name) {
|
||||||
vPathSuffix += ":" + propsObj.ArtifactID
|
vPathSuffix += ":" + pomProperties.ArtifactID
|
||||||
}
|
}
|
||||||
virtualPath := j.virtualPath + vPathSuffix
|
virtualPath := j.virtualPath + vPathSuffix
|
||||||
|
|
||||||
// discovered props = new package
|
// discovered props = new package
|
||||||
p := pkg.Package{
|
p := pkg.Package{
|
||||||
Name: propsObj.ArtifactID,
|
Name: pomProperties.ArtifactID,
|
||||||
Version: propsObj.Version,
|
Version: pomProperties.Version,
|
||||||
Language: pkg.Java,
|
Language: pkg.Java,
|
||||||
Type: pkg.JavaPkg,
|
Type: pkg.JavaPkg,
|
||||||
MetadataType: pkg.JavaMetadataType,
|
MetadataType: pkg.JavaMetadataType,
|
||||||
Metadata: pkg.JavaMetadata{
|
Metadata: pkg.JavaMetadata{
|
||||||
VirtualPath: virtualPath,
|
VirtualPath: virtualPath,
|
||||||
PomProperties: propsObj,
|
PomProperties: pomProperties,
|
||||||
Parent: parentPkg,
|
Parent: parentPkg,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
pkgKey := uniquePkgKey(&p)
|
pkgKey := uniquePkgKey(&p)
|
||||||
|
parentKey := uniquePkgKey(parentPkg)
|
||||||
|
|
||||||
// the name/version pair matches...
|
// the name/version pair matches...
|
||||||
matchesParentPkg := pkgKey == parentKey
|
matchesParentPkg := pkgKey == parentKey
|
||||||
@ -230,33 +225,32 @@ func (j *archiveParser) packagesFromPomProperties(propsObj *pkg.PomProperties, p
|
|||||||
matchesParentPkg = matchesParentPkg || parentPkg.Metadata.(pkg.JavaMetadata).VirtualPath == virtualPath
|
matchesParentPkg = matchesParentPkg || parentPkg.Metadata.(pkg.JavaMetadata).VirtualPath == virtualPath
|
||||||
|
|
||||||
// the pom artifactId has the parent name or vice versa
|
// the pom artifactId has the parent name or vice versa
|
||||||
if propsObj.ArtifactID != "" {
|
if pomProperties.ArtifactID != "" {
|
||||||
matchesParentPkg = matchesParentPkg || strings.Contains(parentPkg.Name, propsObj.ArtifactID) || strings.Contains(propsObj.ArtifactID, parentPkg.Name)
|
matchesParentPkg = matchesParentPkg || strings.Contains(parentPkg.Name, pomProperties.ArtifactID) || strings.Contains(pomProperties.ArtifactID, parentPkg.Name)
|
||||||
}
|
}
|
||||||
|
|
||||||
if matchesParentPkg {
|
if !matchesParentPkg {
|
||||||
// we've run across more information about our parent package, add this info to the parent package metadata
|
|
||||||
// the pom properties is typically a better source of information for name and version than the manifest
|
|
||||||
if parentPkg.Name == "" {
|
|
||||||
parentPkg.Name = p.Name
|
|
||||||
}
|
|
||||||
if parentPkg.Version == "" {
|
|
||||||
parentPkg.Version = p.Version
|
|
||||||
}
|
|
||||||
|
|
||||||
// keep the pom properties, but don't overwrite existing pom properties
|
|
||||||
parentMetadata, ok := parentPkg.Metadata.(pkg.JavaMetadata)
|
|
||||||
if ok && parentMetadata.PomProperties == nil {
|
|
||||||
parentMetadata.PomProperties = propsObj
|
|
||||||
parentPkg.Metadata = parentMetadata
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if !matchesParentPkg && !j.discoveredPkgs.Contains(pkgKey) {
|
|
||||||
// only keep packages we haven't seen yet (and are not related to the parent package)
|
// only keep packages we haven't seen yet (and are not related to the parent package)
|
||||||
pkgs = append(pkgs, p)
|
return []pkg.Package{p}
|
||||||
}
|
}
|
||||||
return pkgs
|
|
||||||
|
// we've run across more information about our parent package, add this info to the parent package metadata
|
||||||
|
// the pom properties is typically a better source of information for name and version than the manifest
|
||||||
|
if parentPkg.Name == "" {
|
||||||
|
parentPkg.Name = p.Name
|
||||||
|
}
|
||||||
|
if parentPkg.Version == "" {
|
||||||
|
parentPkg.Version = p.Version
|
||||||
|
}
|
||||||
|
|
||||||
|
// keep the pom properties, but don't overwrite existing pom properties
|
||||||
|
parentMetadata, ok := parentPkg.Metadata.(pkg.JavaMetadata)
|
||||||
|
if ok && parentMetadata.PomProperties == nil {
|
||||||
|
parentMetadata.PomProperties = pomProperties
|
||||||
|
parentPkg.Metadata = parentMetadata
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// discoverPkgsFromNestedArchives finds Java archives within Java archives, returning all listed Java packages found and
|
// discoverPkgsFromNestedArchives finds Java archives within Java archives, returning all listed Java packages found and
|
||||||
@ -264,10 +258,6 @@ func (j *archiveParser) packagesFromPomProperties(propsObj *pkg.PomProperties, p
|
|||||||
func (j *archiveParser) discoverPkgsFromNestedArchives(parentPkg *pkg.Package) ([]pkg.Package, error) {
|
func (j *archiveParser) discoverPkgsFromNestedArchives(parentPkg *pkg.Package) ([]pkg.Package, error) {
|
||||||
var pkgs = make([]pkg.Package, 0)
|
var pkgs = make([]pkg.Package, 0)
|
||||||
|
|
||||||
if !j.detectNested {
|
|
||||||
return pkgs, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// search and parse pom.properties files & fetch the contents
|
// search and parse pom.properties files & fetch the contents
|
||||||
openers, err := file.ExtractFromZipToUniqueTempFile(j.archivePath, j.contentPath, j.fileManifest.GlobMatch(archiveFormatGlobs...)...)
|
openers, err := file.ExtractFromZipToUniqueTempFile(j.archivePath, j.contentPath, j.fileManifest.GlobMatch(archiveFormatGlobs...)...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user