diff --git a/internal/mimetype_helper.go b/internal/mimetype_helper.go index 3dbc36098..8ddf29fb3 100644 --- a/internal/mimetype_helper.go +++ b/internal/mimetype_helper.go @@ -59,6 +59,7 @@ var ( "application/x-elf", "application/x-sharedlib", "application/vnd.microsoft.portable-executable", + "application/x-executable", }..., ) ) diff --git a/syft/file/classification_cataloger_test.go b/syft/file/classification_cataloger_test.go index 90cca7642..405166225 100644 --- a/syft/file/classification_cataloger_test.go +++ b/syft/file/classification_cataloger_test.go @@ -93,7 +93,8 @@ func TestClassifierCataloger_DefaultClassifiers_PositiveCases(t *testing.T) { location: "[", // note: busybox is a link to [ expected: []Classification{ { - Class: "busybox-binary", + Class: "busybox-binary", + VirtualPath: "busybox", Metadata: map[string]string{ "version": "3.33.3", }, @@ -148,7 +149,8 @@ func TestClassifierCataloger_DefaultClassifiers_PositiveCases_Image(t *testing.T location: "/bin/[", expected: []Classification{ { - Class: "busybox-binary", + Class: "busybox-binary", + VirtualPath: "/bin/busybox", Metadata: map[string]string{ "version": "1.35.0", }, diff --git a/syft/file/classifier.go b/syft/file/classifier.go index 9b35b5110..e71a5fa02 100644 --- a/syft/file/classifier.go +++ b/syft/file/classifier.go @@ -40,6 +40,16 @@ var DefaultClassifiers = []Classifier{ `(?m)go(?P[0-9]+\.[0-9]+(\.[0-9]+|beta[0-9]+|alpha[0-9]+|rc[0-9]+)?)`, }, }, + { + Class: "nodejs-binary", + FilepathPatterns: []*regexp.Regexp{ + regexp.MustCompile(`(.*/|^)node$`), + }, + EvidencePatternTemplates: []string{ + // regex that matches node.js/vx.y.z + `(?m)node\.js\/v(?P[0-9]+\.[0-9]+\.[0-9]+)`, + }, + }, { Class: "go-binary-hint", FilepathPatterns: []*regexp.Regexp{ @@ -67,12 +77,13 @@ type Classifier struct { } type Classification struct { - Class string `json:"class"` - Metadata map[string]string `json:"metadata"` + Class string `json:"class"` + VirtualPath string `json:"virtual_path"` + Metadata map[string]string `json:"metadata"` } func (c Classifier) Classify(resolver source.FileResolver, location source.Location) (*Classification, error) { - doesFilepathMatch, filepathNamedGroupValues := filepathMatches(c.FilepathPatterns, location) + doesFilepathMatch, filepathNamedGroupValues := FilepathMatches(c.FilepathPatterns, location) if !doesFilepathMatch { return nil, nil } @@ -114,8 +125,9 @@ func (c Classifier) Classify(resolver source.FileResolver, location source.Locat matchMetadata := internal.MatchNamedCaptureGroups(pattern, string(contents)) if result == nil { result = &Classification{ - Class: c.Class, - Metadata: matchMetadata, + Class: c.Class, + VirtualPath: location.VirtualPath, + Metadata: matchMetadata, } } else { for key, value := range matchMetadata { @@ -126,7 +138,7 @@ func (c Classifier) Classify(resolver source.FileResolver, location source.Locat return result, nil } -func filepathMatches(patterns []*regexp.Regexp, location source.Location) (bool, map[string]string) { +func FilepathMatches(patterns []*regexp.Regexp, location source.Location) (bool, map[string]string) { for _, path := range []string{location.RealPath, location.VirtualPath} { if path == "" { continue diff --git a/syft/file/classifier_test.go b/syft/file/classifier_test.go index ddfa50a2b..4421bc349 100644 --- a/syft/file/classifier_test.go +++ b/syft/file/classifier_test.go @@ -89,7 +89,7 @@ func TestFilepathMatches(t *testing.T) { for _, p := range test.patterns { patterns = append(patterns, regexp.MustCompile(p)) } - actualMatches, actualNamedGroups := filepathMatches(patterns, test.location) + actualMatches, actualNamedGroups := FilepathMatches(patterns, test.location) assert.Equal(t, test.expectedMatches, actualMatches) assert.Equal(t, test.expectedNamedGroups, actualNamedGroups) }) diff --git a/syft/formats/common/spdxhelpers/source_info_test.go b/syft/formats/common/spdxhelpers/source_info_test.go index 75f3987ba..6de1f461a 100644 --- a/syft/formats/common/spdxhelpers/source_info_test.go +++ b/syft/formats/common/spdxhelpers/source_info_test.go @@ -183,6 +183,14 @@ func Test_SourceInfo(t *testing.T) { "from cabal or stack manifest files", }, }, + { + input: pkg.Package{ + Type: pkg.BinaryPkg, + }, + expected: []string{ + "acquired package info from the following paths", + }, + }, } var pkgTypes []pkg.Type for _, test := range tests { diff --git a/syft/pkg/binary_metadata.go b/syft/pkg/binary_metadata.go new file mode 100644 index 000000000..0fd1413c2 --- /dev/null +++ b/syft/pkg/binary_metadata.go @@ -0,0 +1,7 @@ +package pkg + +type BinaryMetadata struct { + Classifier string + RealPath string + VirtualPath string +} diff --git a/syft/pkg/cataloger/catalog.go b/syft/pkg/cataloger/catalog.go index 1e14ef86a..79d7dc0bb 100644 --- a/syft/pkg/cataloger/catalog.go +++ b/syft/pkg/cataloger/catalog.go @@ -66,7 +66,8 @@ func Catalog(resolver source.FileResolver, release *linux.Release, catalogers .. for _, p := range packages { // generate CPEs (note: this is excluded from package ID, so is safe to mutate) - p.CPEs = cpe.Generate(p) + // we might have binary classified CPE already with the package so we want to append here + p.CPEs = append(p.CPEs, cpe.Generate(p)...) // generate PURL (note: this is excluded from package ID, so is safe to mutate) p.PURL = pkg.URL(p, release) @@ -85,7 +86,6 @@ func Catalog(resolver source.FileResolver, release *linux.Release, catalogers .. } else { allRelationships = append(allRelationships, owningRelationships...) } - // add to catalog catalog.Add(p) } diff --git a/syft/pkg/cataloger/cataloger.go b/syft/pkg/cataloger/cataloger.go index dd704695f..452b326eb 100644 --- a/syft/pkg/cataloger/cataloger.go +++ b/syft/pkg/cataloger/cataloger.go @@ -39,6 +39,7 @@ func ImageCatalogers(cfg Config) []pkg.Cataloger { python.NewPythonPackageCataloger(), php.NewPHPComposerInstalledCataloger(), javascript.NewJavascriptPackageCataloger(), + javascript.NewNodeBinaryCataloger(), deb.NewDpkgdbCataloger(), rpm.NewRpmdbCataloger(), java.NewJavaCataloger(cfg.Java()), @@ -58,6 +59,7 @@ func DirectoryCatalogers(cfg Config) []pkg.Cataloger { python.NewPythonPackageCataloger(), php.NewPHPComposerLockCataloger(), javascript.NewJavascriptLockCataloger(), + javascript.NewNodeBinaryCataloger(), deb.NewDpkgdbCataloger(), rpm.NewRpmdbCataloger(), rpm.NewFileCataloger(), @@ -86,6 +88,7 @@ func AllCatalogers(cfg Config) []pkg.Cataloger { python.NewPythonPackageCataloger(), javascript.NewJavascriptLockCataloger(), javascript.NewJavascriptPackageCataloger(), + javascript.NewNodeBinaryCataloger(), deb.NewDpkgdbCataloger(), rpm.NewRpmdbCataloger(), rpm.NewFileCataloger(), diff --git a/syft/pkg/cataloger/common/cpe/generate.go b/syft/pkg/cataloger/common/cpe/generate.go index 5de119b54..afce232da 100644 --- a/syft/pkg/cataloger/common/cpe/generate.go +++ b/syft/pkg/cataloger/common/cpe/generate.go @@ -70,6 +70,11 @@ func candidateVendors(p pkg.Package) []string { vendors := newFieldCandidateSet(candidateProducts(p)...) switch p.Language { + case pkg.JavaScript: + // for JavaScript if we find node.js as a package then the vendor is "nodejs" + if p.Name == "node.js" { + vendors.addValue("nodejs") + } case pkg.Ruby: vendors.addValue("ruby-lang") case pkg.Go: diff --git a/syft/pkg/cataloger/generic/classifier.go b/syft/pkg/cataloger/generic/classifier.go new file mode 100644 index 000000000..8a557fff8 --- /dev/null +++ b/syft/pkg/cataloger/generic/classifier.go @@ -0,0 +1,87 @@ +package generic + +import ( + "fmt" + "io" + "path" + "regexp" + + "github.com/anchore/syft/internal" + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/pkg/cataloger/internal/unionreader" + "github.com/anchore/syft/syft/source" +) + +// Classifier is a generic package classifier that can be used to match a package definition +// to a file that meets the given content criteria of the EvidencePatternTemplates. +type Classifier struct { + Package string + // FilepathPatterns is a list of regular expressions that will be used to match against the file path of a given + // source location. If any of the patterns match, the file will be considered a candidate for parsing. + // If no patterns are provided, the reader is automatically considered a candidate. + FilepathPatterns []*regexp.Regexp + // EvidencePattern is a list of regular expressions that will be used to match against the file contents of a + // given file in the source location. If any of the patterns match, the file will be considered a candidate for parsing. + EvidencePatterns []*regexp.Regexp + // CPE is the CPE we want to match against + CPEs []pkg.CPE +} + +func (c Classifier) Examine(reader source.LocationReadCloser) (p *pkg.Package, r *artifact.Relationship, err error) { + doesFilepathMatch := true + if len(c.FilepathPatterns) > 0 { + doesFilepathMatch, _ = file.FilepathMatches(c.FilepathPatterns, reader.Location) + } + + if !doesFilepathMatch { + return nil, nil, fmt.Errorf("location: %s did not match any patterns for package=%q", reader.Location, c.Package) + } + + contents, err := getContents(reader) + if err != nil { + return nil, nil, fmt.Errorf("unable to get read contents for file: %+v", err) + } + + var classifiedPackage *pkg.Package + for _, patternTemplate := range c.EvidencePatterns { + if !patternTemplate.Match(contents) { + continue + } + + matchMetadata := internal.MatchNamedCaptureGroups(patternTemplate, string(contents)) + if classifiedPackage == nil { + classifiedPackage = &pkg.Package{ + Name: path.Base(reader.VirtualPath), + Version: matchMetadata["version"], + Language: pkg.Binary, + Locations: source.NewLocationSet(reader.Location), + Type: pkg.BinaryPkg, + CPEs: c.CPEs, + MetadataType: pkg.BinaryMetadataType, + Metadata: pkg.BinaryMetadata{ + Classifier: c.Package, + RealPath: reader.RealPath, + VirtualPath: reader.VirtualPath, + }, + } + break + } + } + return classifiedPackage, nil, nil +} + +func getContents(reader source.LocationReadCloser) ([]byte, error) { + unionReader, err := unionreader.GetUnionReader(reader.ReadCloser) + if err != nil { + return nil, fmt.Errorf("unable to get union reader for file: %+v", err) + } + + contents, err := io.ReadAll(unionReader) + if err != nil { + return nil, fmt.Errorf("unable to get contents for file: %+v", err) + } + + return contents, nil +} diff --git a/syft/pkg/cataloger/javascript/cataloger.go b/syft/pkg/cataloger/javascript/cataloger.go index cefbaccb5..150a537f2 100644 --- a/syft/pkg/cataloger/javascript/cataloger.go +++ b/syft/pkg/cataloger/javascript/cataloger.go @@ -9,9 +9,11 @@ import ( "path" "strings" + "github.com/anchore/syft/internal" "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg/cataloger/common" + "github.com/anchore/syft/syft/pkg/cataloger/generic" "github.com/anchore/syft/syft/source" ) @@ -35,6 +37,11 @@ func NewJavascriptLockCataloger() *common.GenericCataloger { return common.NewGenericCataloger(nil, globParsers, "javascript-lock-cataloger", addLicenses) } +func NewNodeBinaryCataloger() *generic.Cataloger { + return generic.NewCataloger("node-binary-cataloger"). + WithParserByMimeTypes(parseNodeBinary, internal.ExecutableMIMETypeSet.List()...) +} + func addLicenses(resolver source.FileResolver, location source.Location, p *pkg.Package) error { dir := path.Dir(location.RealPath) pkgPath := []string{dir, "node_modules"} diff --git a/syft/pkg/cataloger/javascript/parse_node_binary.go b/syft/pkg/cataloger/javascript/parse_node_binary.go new file mode 100644 index 000000000..e7933fcbd --- /dev/null +++ b/syft/pkg/cataloger/javascript/parse_node_binary.go @@ -0,0 +1,43 @@ +package javascript + +import ( + "regexp" + + "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/pkg/cataloger/generic" + "github.com/anchore/syft/syft/source" +) + +var nodeClassifier = generic.Classifier{ + Package: "node.js", // Note: this purposely matches the "node.js" string to aid nvd vuln matching + FilepathPatterns: []*regexp.Regexp{ + // note: should we just parse all files resolved with executable mimetypes + // regexp that matches node binary + regexp.MustCompile(`(.*/|^)node$`), + }, + EvidencePatterns: []*regexp.Regexp{ + // regex that matches node.js/vx.y.z + regexp.MustCompile(`(?m)node\.js\/v(?P[0-9]+\.[0-9]+\.[0-9]+)`), + }, + CPEs: []pkg.CPE{ + pkg.MustCPE("cpe:2.3:a:nodejs:node.js:*:*:*:*:*:*:*:*"), + }, +} + +func parseNodeBinary(_ source.FileResolver, _ *generic.Environment, reader source.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { + p, _, err := nodeClassifier.Examine(reader) + if err != nil { + log.Trace("failed to find node.js package: %+v", err) + return nil, nil, nil // we can silently fail here to reduce warning noise + } + + // TODO add node specific metadata to the packages to help with vulnerability matching + if p != nil { + p.Language = pkg.JavaScript + return []pkg.Package{*p}, nil, nil + } + p.SetID() + return nil, nil, nil +} diff --git a/syft/pkg/language.go b/syft/pkg/language.go index 8e3d6193c..9e8054d60 100644 --- a/syft/pkg/language.go +++ b/syft/pkg/language.go @@ -24,6 +24,7 @@ const ( Swift Language = "swift" CPP Language = "c++" Haskell Language = "haskell" + Binary Language = "binary" ) // AllLanguages is a set of all programming languages detected by syft. diff --git a/syft/pkg/metadata.go b/syft/pkg/metadata.go index 6a93f7872..dcf2c7c6a 100644 --- a/syft/pkg/metadata.go +++ b/syft/pkg/metadata.go @@ -13,6 +13,7 @@ const ( UnknownMetadataType MetadataType = "UnknownMetadata" ApkMetadataType MetadataType = "ApkMetadata" AlpmMetadataType MetadataType = "AlpmMetadata" + BinaryMetadataType MetadataType = "BinaryMetadata" DpkgMetadataType MetadataType = "DpkgMetadata" GemMetadataType MetadataType = "GemMetadata" JavaMetadataType MetadataType = "JavaMetadata" @@ -35,6 +36,7 @@ const ( var AllMetadataTypes = []MetadataType{ ApkMetadataType, AlpmMetadataType, + BinaryMetadataType, DpkgMetadataType, GemMetadataType, JavaMetadataType, @@ -57,6 +59,7 @@ var AllMetadataTypes = []MetadataType{ var MetadataTypeByName = map[MetadataType]reflect.Type{ ApkMetadataType: reflect.TypeOf(ApkMetadata{}), AlpmMetadataType: reflect.TypeOf(AlpmMetadata{}), + BinaryMetadataType: reflect.TypeOf(BinaryMetadata{}), DpkgMetadataType: reflect.TypeOf(DpkgMetadata{}), GemMetadataType: reflect.TypeOf(GemMetadata{}), JavaMetadataType: reflect.TypeOf(JavaMetadata{}), diff --git a/syft/pkg/type.go b/syft/pkg/type.go index 6dafed472..bedb6c85c 100644 --- a/syft/pkg/type.go +++ b/syft/pkg/type.go @@ -8,6 +8,7 @@ type Type string const ( // the full set of supported packages UnknownPkg Type = "UnknownPackage" + BinaryPkg Type = "binary" ApkPkg Type = "apk" AlpmPkg Type = "alpm" GemPkg Type = "gem" @@ -33,6 +34,7 @@ const ( var AllPkgs = []Type{ ApkPkg, AlpmPkg, + BinaryPkg, GemPkg, DebPkg, RpmPkg, diff --git a/syft/pkg/type_test.go b/syft/pkg/type_test.go index 5902185dc..e196ac508 100644 --- a/syft/pkg/type_test.go +++ b/syft/pkg/type_test.go @@ -87,10 +87,12 @@ func TestTypeFromPURL(t *testing.T) { expectedTypes.Add(string(ty)) } - // testing microsoft packages and jenkins-plugins is not valid for purl at this time + // testing microsoft packages and jenkins-plugins and custom binary type + // is not valid for purl at this time expectedTypes.Remove(string(KbPkg)) expectedTypes.Remove(string(JenkinsPluginPkg)) expectedTypes.Remove(string(PortagePkg)) + expectedTypes.Remove(string(BinaryPkg)) for _, test := range tests { t.Run(string(test.expected), func(t *testing.T) { diff --git a/syft/pkg/url_test.go b/syft/pkg/url_test.go index 551b6b944..f3b98e854 100644 --- a/syft/pkg/url_test.go +++ b/syft/pkg/url_test.go @@ -151,6 +151,7 @@ func TestPackageURL(t *testing.T) { expectedTypes.Remove(string(DebPkg)) expectedTypes.Remove(string(GoModulePkg)) expectedTypes.Remove(string(HackagePkg)) + expectedTypes.Remove(string(BinaryPkg)) for _, test := range tests { t.Run(test.name, func(t *testing.T) { diff --git a/test/cli/packages_cmd_test.go b/test/cli/packages_cmd_test.go index 2170c6bc5..a3428ac33 100644 --- a/test/cli/packages_cmd_test.go +++ b/test/cli/packages_cmd_test.go @@ -10,6 +10,7 @@ import ( func TestPackagesCmdFlags(t *testing.T) { hiddenPackagesImage := "docker-archive:" + getFixtureImage(t, "image-hidden-packages") coverageImage := "docker-archive:" + getFixtureImage(t, "image-pkg-coverage") + nodeBinaryImage := "docker-archive:" + getFixtureImage(t, "image-node-binary") //badBinariesImage := "docker-archive:" + getFixtureImage(t, "image-bad-binaries") tmp := t.TempDir() + "/" @@ -142,6 +143,15 @@ func TestPackagesCmdFlags(t *testing.T) { assertSuccessfulReturnCode, }, }, + { + name: "catalog-node-js-binary", + args: []string{"packages", "-o", "json", nodeBinaryImage}, + assertions: []traitAssertion{ + assertJsonReport, + assertInOutput("node.js"), + assertSuccessfulReturnCode, + }, + }, { name: "responds-to-package-cataloger-search-options", args: []string{"packages", "-vv"}, diff --git a/test/cli/test-fixtures/image-node-binary/Dockerfile b/test/cli/test-fixtures/image-node-binary/Dockerfile new file mode 100644 index 000000000..6bda80cab --- /dev/null +++ b/test/cli/test-fixtures/image-node-binary/Dockerfile @@ -0,0 +1 @@ +FROM node:19-alpine3.15 \ No newline at end of file diff --git a/test/integration/catalog_packages_test.go b/test/integration/catalog_packages_test.go index 71f5e0c8e..8a54b1b03 100644 --- a/test/integration/catalog_packages_test.go +++ b/test/integration/catalog_packages_test.go @@ -85,6 +85,7 @@ func TestPkgCoverageImage(t *testing.T) { definedPkgs.Remove(string(pkg.CocoapodsPkg)) definedPkgs.Remove(string(pkg.ConanPkg)) definedPkgs.Remove(string(pkg.HackagePkg)) + definedPkgs.Remove(string(pkg.BinaryPkg)) var cases []testCase cases = append(cases, commonTestCases...) @@ -206,6 +207,7 @@ func TestPkgCoverageDirectory(t *testing.T) { observedLanguages.Remove(pkg.UnknownLanguage.String()) definedLanguages.Remove(pkg.UnknownLanguage.String()) observedPkgs.Remove(string(pkg.UnknownPkg)) + definedPkgs.Remove(string(pkg.BinaryPkg)) definedPkgs.Remove(string(pkg.UnknownPkg)) // for directory scans we should not expect to see any of the following package types