diff --git a/syft/source/directorysource/alias/detector.go b/syft/source/directorysource/alias/detector.go new file mode 100644 index 000000000..66b5c77a9 --- /dev/null +++ b/syft/source/directorysource/alias/detector.go @@ -0,0 +1,13 @@ +package alias + +import "github.com/anchore/syft/syft/source" + +// Identifier is used by certain sources (directory, file) to attempt to identify the name and version of a scan target +type Identifier func(src source.Source) *source.Alias + +func DefaultIdentifiers() []Identifier { + return []Identifier{ + NPMPackageAliasIdentifier, + MavenProjectDirIdentifier, + } +} diff --git a/syft/source/directorysource/alias/maven_pom_xml.go b/syft/source/directorysource/alias/maven_pom_xml.go new file mode 100644 index 000000000..eb115d145 --- /dev/null +++ b/syft/source/directorysource/alias/maven_pom_xml.go @@ -0,0 +1,77 @@ +package alias + +import ( + "encoding/xml" + + "github.com/anchore/syft/internal" + "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/syft/source" +) + +// MavenProjectDirIdentifier augments name and version with what's found in a root pom.xml +func MavenProjectDirIdentifier(src source.Source) *source.Alias { + type pomXML struct { + Parent *pomXML `xml:"parent"` + Name string `xml:"name"` + Version string `xml:"version"` + } + + // it's possible older layers would have a pom.xml that gets removed, + // but we can probably skip identifying a directory as those + r, err := src.FileResolver(source.SquashedScope) + if err != nil { + log.Debugf("error getting file resolver: %v", err) + return nil + } + + locs, err := r.FilesByPath("pom.xml") + if err != nil { + log.Debugf("error getting pom.xml: %v", err) + return nil + } + + // if we don't have exactly 1 pom.xml in the root directory, we can't guess which is the right one to use + if len(locs) == 0 { + // expected, not found + return nil + } + if len(locs) > 1 { + log.Debugf("multiple pom.xml files found: %v", locs) + return nil + } + + contents, err := r.FileContentsByLocation(locs[0]) + if err != nil { + log.Tracef("error getting pom.xml contents: %v", err) + return nil + } + defer internal.CloseAndLogError(contents, locs[0].RealPath) + + dec := xml.NewDecoder(contents) + project := pomXML{} + err = dec.Decode(&project) + if err != nil { + log.Tracef("error decoding pom.xml contents: %v", err) + return nil + } + + parent := pomXML{} + if project.Parent != nil { + parent = *project.Parent + } + + return &source.Alias{ + Name: project.Name, + Version: nonEmpty(project.Version, parent.Version), + } +} + +// nonEmpty returns the first non-empty string provided +func nonEmpty(values ...string) string { + for _, v := range values { + if v != "" { + return v + } + } + return "" +} diff --git a/syft/source/directorysource/alias/package_json.go b/syft/source/directorysource/alias/package_json.go new file mode 100644 index 000000000..ab27295b6 --- /dev/null +++ b/syft/source/directorysource/alias/package_json.go @@ -0,0 +1,59 @@ +package alias + +import ( + "encoding/json" + + "github.com/anchore/syft/internal" + "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/syft/source" +) + +// NPMPackageAliasIdentifier augments name and version with what's found in a root package.json +func NPMPackageAliasIdentifier(src source.Source) *source.Alias { + type js struct { + Name string `json:"name"` + Version string `json:"version"` + } + + // it's possible older layers would have a package.json that gets removed, + // but we can probably skip identifying a directory as those + r, err := src.FileResolver(source.SquashedScope) + if err != nil { + log.Debugf("error getting file resolver: %v", err) + return nil + } + locs, err := r.FilesByPath("package.json") + if err != nil { + log.Debugf("error getting package.json: %v", err) + return nil + } + // if we don't have exactly 1 package.json in the root directory, we can't guess which is the right one to use + if len(locs) == 0 { + // expected, not found + return nil + } + if len(locs) > 1 { + log.Debugf("multiple package.json files found: %v", locs) + return nil + } + + contents, err := r.FileContentsByLocation(locs[0]) + if err != nil { + log.Tracef("error getting package.json contents: %v", err) + return nil + } + defer internal.CloseAndLogError(contents, locs[0].RealPath) + + dec := json.NewDecoder(contents) + project := js{} + err = dec.Decode(&project) + if err != nil { + log.Tracef("error decoding package.json contents: %v", err) + return nil + } + + return &source.Alias{ + Name: project.Name, + Version: project.Version, + } +} diff --git a/syft/source/directorysource/directory_source.go b/syft/source/directorysource/directory_source.go index 2a4ab3705..69d76811b 100644 --- a/syft/source/directorysource/directory_source.go +++ b/syft/source/directorysource/directory_source.go @@ -15,16 +15,18 @@ import ( "github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/internal/fileresolver" "github.com/anchore/syft/syft/source" + "github.com/anchore/syft/syft/source/directorysource/alias" "github.com/anchore/syft/syft/source/internal" ) var _ source.Source = (*directorySource)(nil) type Config struct { - Path string - Base string - Exclude source.ExcludeConfig - Alias source.Alias + Path string + Base string + Exclude source.ExcludeConfig + Alias source.Alias + Identifiers []alias.Identifier } type directorySource struct { @@ -51,11 +53,22 @@ func New(cfg Config) (source.Source, error) { return nil, fmt.Errorf("given path is not a directory: %q", cfg.Path) } - return &directorySource{ - id: deriveIDFromDirectory(cfg), + src := &directorySource{ config: cfg, mutex: &sync.Mutex{}, - }, nil + } + + for _, identifier := range cfg.Identifiers { + id := identifier(src) + if !id.IsEmpty() { + src.config.Alias = *id + break + } + } + + src.id = deriveIDFromDirectory(src.config) + + return src, nil } // deriveIDFromDirectory generates an artifact ID from the given directory config. If an alias is provided, then diff --git a/syft/source/directorysource/directory_source_provider.go b/syft/source/directorysource/directory_source_provider.go index 11eed73d8..5d86b9b70 100644 --- a/syft/source/directorysource/directory_source_provider.go +++ b/syft/source/directorysource/directory_source_provider.go @@ -8,6 +8,7 @@ import ( "github.com/spf13/afero" "github.com/anchore/syft/syft/source" + "github.com/anchore/syft/syft/source/directorysource/alias" ) func NewSourceProvider(path string, exclude source.ExcludeConfig, alias source.Alias, basePath string) source.Provider { @@ -48,10 +49,11 @@ func (l directorySourceProvider) Provide(_ context.Context) (source.Source, erro return New( Config{ - Path: location, - Base: basePath(l.basePath, location), - Exclude: l.exclude, - Alias: l.alias, + Path: location, + Base: basePath(l.basePath, location), + Exclude: l.exclude, + Alias: l.alias, + Identifiers: alias.DefaultIdentifiers(), }, ) }