feat: add ability to retrieve remote licenses for yarn.lock (#2338)

---------

Signed-off-by: Colm O hEigeartaigh <coheigea@apache.org>
Signed-off-by: Christopher Phillips <christopher.phillips@anchore.com>
Co-authored-by: Christopher Phillips <christopher.phillips@anchore.com>
This commit is contained in:
Colm O hEigeartaigh 2023-12-05 18:38:28 +00:00 committed by GitHub
parent 23778de112
commit 16dee41b4b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 334 additions and 17 deletions

View File

@ -656,6 +656,10 @@ python:
# when given an arbitrary constraint will be used (even if that version may not be available/published).
guess-unpinned-requirements: false
javascript:
search-remote-licenses: false
npm-base-url: "https://registry.npmjs.org"
file-contents:
cataloger:
# enable/disable cataloging of file contents

View File

@ -16,6 +16,7 @@ import (
"github.com/anchore/syft/syft/pkg/cataloger"
golangCataloger "github.com/anchore/syft/syft/pkg/cataloger/golang"
javaCataloger "github.com/anchore/syft/syft/pkg/cataloger/java"
javascriptCataloger "github.com/anchore/syft/syft/pkg/cataloger/javascript"
"github.com/anchore/syft/syft/pkg/cataloger/kernel"
pythonCataloger "github.com/anchore/syft/syft/pkg/cataloger/python"
"github.com/anchore/syft/syft/source"
@ -26,6 +27,7 @@ type Catalog struct {
Package pkg `yaml:"package" json:"package" mapstructure:"package"`
Golang golang `yaml:"golang" json:"golang" mapstructure:"golang"`
Java java `yaml:"java" json:"java" mapstructure:"java"`
Javascript javascript `yaml:"javascript" json:"javascript" mapstructure:"javascript"`
LinuxKernel linuxKernel `yaml:"linux-kernel" json:"linux-kernel" mapstructure:"linux-kernel"`
Python python `yaml:"python" json:"python" mapstructure:"python"`
FileMetadata fileMetadata `yaml:"file-metadata" json:"file-metadata" mapstructure:"file-metadata"`
@ -145,6 +147,9 @@ func (cfg Catalog) ToCatalogerConfig() cataloger.Config {
IncludeUnindexedArchives: cfg.Package.SearchUnindexedArchives,
},
cfg.Java.MaxParentRecursiveDepth),
Javascript: javascriptCataloger.DefaultCatalogerConfig().
WithSearchRemoteLicenses(cfg.Javascript.SearchRemoteLicenses).
WithNpmBaseURL(cfg.Javascript.NpmBaseURL),
Python: pythonCataloger.CatalogerConfig{
GuessUnpinnedRequirements: cfg.Python.GuessUnpinnedRequirements,
},

View File

@ -0,0 +1,6 @@
package options
type javascript struct {
SearchRemoteLicenses bool `json:"search-remote-licenses" yaml:"search-remote-licenses" mapstructure:"search-remote-licenses"`
NpmBaseURL string `json:"npm-base-url" yaml:"npm-base-url" mapstructure:"npm-base-url"`
}

View File

@ -85,7 +85,7 @@ func DirectoryCatalogers(cfg Config) []pkg.Cataloger {
java.NewGradleLockfileCataloger(),
java.NewPomCataloger(),
java.NewNativeImageCataloger(),
javascript.NewLockCataloger(),
javascript.NewLockCataloger(cfg.Javascript),
nix.NewStoreCataloger(),
php.NewComposerLockCataloger(),
gentoo.NewPortageCataloger(),
@ -124,7 +124,7 @@ func AllCatalogers(cfg Config) []pkg.Cataloger {
java.NewGradleLockfileCataloger(),
java.NewPomCataloger(),
java.NewNativeImageCataloger(),
javascript.NewLockCataloger(),
javascript.NewLockCataloger(cfg.Javascript),
javascript.NewPackageCataloger(),
kernel.NewLinuxKernelCataloger(cfg.LinuxKernel),
nix.NewStoreCataloger(),

View File

@ -4,6 +4,7 @@ import (
"github.com/anchore/syft/syft/cataloging"
"github.com/anchore/syft/syft/pkg/cataloger/golang"
"github.com/anchore/syft/syft/pkg/cataloger/java"
"github.com/anchore/syft/syft/pkg/cataloger/javascript"
"github.com/anchore/syft/syft/pkg/cataloger/kernel"
"github.com/anchore/syft/syft/pkg/cataloger/python"
)
@ -15,6 +16,7 @@ type Config struct {
LinuxKernel kernel.LinuxKernelCatalogerConfig
Python python.CatalogerConfig
Java java.ArchiveCatalogerConfig
Javascript javascript.CatalogerConfig
Catalogers []string
Parallelism int
ExcludeBinaryOverlapByOwnership bool
@ -27,6 +29,7 @@ func DefaultConfig() Config {
LinuxKernel: kernel.DefaultLinuxCatalogerConfig(),
Python: python.DefaultCatalogerConfig(),
Java: java.DefaultArchiveCatalogerConfig(),
Javascript: javascript.DefaultCatalogerConfig(),
ExcludeBinaryOverlapByOwnership: true,
}
}

View File

@ -15,9 +15,10 @@ func NewPackageCataloger() pkg.Cataloger {
}
// NewLockCataloger returns a new cataloger object for NPM (and NPM-adjacent, such as yarn) lock files.
func NewLockCataloger() pkg.Cataloger {
func NewLockCataloger(cfg CatalogerConfig) pkg.Cataloger {
yarnLockAdapter := newGenericYarnLockAdapter(cfg)
return generic.NewCataloger("javascript-lock-cataloger").
WithParserByGlobs(parsePackageLock, "**/package-lock.json").
WithParserByGlobs(parseYarnLock, "**/yarn.lock").
WithParserByGlobs(yarnLockAdapter.parseYarnLock, "**/yarn.lock").
WithParserByGlobs(parsePnpmLock, "**/pnpm-lock.yaml")
}

View File

@ -132,7 +132,7 @@ func Test_JavascriptCataloger(t *testing.T) {
pkgtest.NewCatalogTester().
FromDirectory(t, "test-fixtures/pkg-lock").
Expects(expectedPkgs, nil).
TestCataloger(t, NewLockCataloger())
TestCataloger(t, NewLockCataloger(CatalogerConfig{}))
}
@ -183,7 +183,7 @@ func Test_LockCataloger_Globs(t *testing.T) {
pkgtest.NewCatalogTester().
FromDirectory(t, test.fixture).
ExpectsResolverContentQueries(test.expected).
TestCataloger(t, NewLockCataloger())
TestCataloger(t, NewLockCataloger(CatalogerConfig{}))
})
}
}

View File

@ -0,0 +1,27 @@
package javascript
const npmBaseURL = "https://registry.npmjs.org"
type CatalogerConfig struct {
searchRemoteLicenses bool
npmBaseURL string
}
func DefaultCatalogerConfig() CatalogerConfig {
return CatalogerConfig{
searchRemoteLicenses: false,
npmBaseURL: npmBaseURL,
}
}
func (j CatalogerConfig) WithSearchRemoteLicenses(input bool) CatalogerConfig {
j.searchRemoteLicenses = input
return j
}
func (j CatalogerConfig) WithNpmBaseURL(input string) CatalogerConfig {
if input != "" {
j.npmBaseURL = input
}
return j
}

View File

@ -2,9 +2,13 @@ package javascript
import (
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"path"
"strings"
"time"
"github.com/anchore/packageurl-go"
"github.com/anchore/syft/internal/log"
@ -106,13 +110,27 @@ func newPnpmPackage(resolver file.Resolver, location file.Location, name, versio
)
}
func newYarnLockPackage(resolver file.Resolver, location file.Location, name, version string) pkg.Package {
func newYarnLockPackage(cfg CatalogerConfig, resolver file.Resolver, location file.Location, name, version string) pkg.Package {
var licenseSet pkg.LicenseSet
if cfg.searchRemoteLicenses {
license, err := getLicenseFromNpmRegistry(cfg.npmBaseURL, name, version)
if err == nil && license != "" {
licenses := pkg.NewLicensesFromValues(license)
licenseSet = pkg.NewLicenseSet(licenses...)
}
if err != nil {
log.Warnf("unable to extract licenses from javascript yarn.lock for package %s:%s: %+v", name, version, err)
}
}
return finalizeLockPkg(
resolver,
location,
pkg.Package{
Name: name,
Version: version,
Licenses: licenseSet,
Locations: file.NewLocationSet(location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation)),
PURL: packageURL(name, version),
Language: pkg.JavaScript,
@ -121,6 +139,63 @@ func newYarnLockPackage(resolver file.Resolver, location file.Location, name, ve
)
}
func formatNpmRegistryURL(baseURL, packageName, version string) (requestURL string, err error) {
urlPath := []string{packageName, version}
requestURL, err = url.JoinPath(baseURL, urlPath...)
if err != nil {
return requestURL, fmt.Errorf("unable to format npm request for pkg:version %s%s; %w", packageName, version, err)
}
return requestURL, nil
}
func getLicenseFromNpmRegistry(basURL, packageName, version string) (string, error) {
// "https://registry.npmjs.org/%s/%s", packageName, version
requestURL, err := formatNpmRegistryURL(basURL, packageName, version)
if err != nil {
return "", fmt.Errorf("unable to format npm request for pkg:version %s%s; %w", packageName, version, err)
}
log.Tracef("trying to fetch remote package %s", requestURL)
npmRequest, err := http.NewRequest(http.MethodGet, requestURL, nil)
if err != nil {
return "", fmt.Errorf("unable to format remote request: %w", err)
}
httpClient := &http.Client{
Timeout: time.Second * 10,
}
resp, err := httpClient.Do(npmRequest)
if err != nil {
return "", fmt.Errorf("unable to get package from npm registry: %w", err)
}
defer func() {
if err := resp.Body.Close(); err != nil {
log.Errorf("unable to close body: %+v", err)
}
}()
bytes, err := io.ReadAll(resp.Body)
if err != nil {
return "", fmt.Errorf("unable to parse package from npm registry: %w", err)
}
dec := json.NewDecoder(strings.NewReader(string(bytes)))
// Read "license" from the response
var license struct {
License string `json:"license"`
}
if err := dec.Decode(&license); err != nil {
return "", fmt.Errorf("unable to parse license from npm registry: %w", err)
}
log.Tracef("Retrieved License: %s", license.License)
return license.License, nil
}
func finalizeLockPkg(resolver file.Resolver, location file.Location, p pkg.Package) pkg.Package {
licenseCandidate := addLicenses(p.Name, resolver, location)
p.Licenses.Add(pkg.NewLicensesFromLocation(location, licenseCandidate...)...)

View File

@ -13,9 +13,6 @@ import (
"github.com/anchore/syft/syft/pkg/cataloger/generic"
)
// integrity check
var _ generic.Parser = parseYarnLock
var (
// packageNameExp matches the name of the dependency in yarn.lock
// including scope/namespace prefix if found.
@ -43,7 +40,17 @@ const (
noVersion = ""
)
func parseYarnLock(resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
type genericYarnLockAdapter struct {
cfg CatalogerConfig
}
func newGenericYarnLockAdapter(cfg CatalogerConfig) genericYarnLockAdapter {
return genericYarnLockAdapter{
cfg: cfg,
}
}
func (a genericYarnLockAdapter) parseYarnLock(resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
// in the case we find yarn.lock files in the node_modules directories, skip those
// as the whole purpose of the lock file is for the specific dependencies of the project
if pathContainsNodeModulesDirectory(reader.Path()) {
@ -62,7 +69,7 @@ func parseYarnLock(resolver file.Resolver, _ *generic.Environment, reader file.L
if packageName := findPackageName(line); packageName != noPackage {
// When we find a new package, check if we have unsaved identifiers
if currentPackage != noPackage && currentVersion != noVersion && !parsedPackages.Has(currentPackage+"@"+currentVersion) {
pkgs = append(pkgs, newYarnLockPackage(resolver, reader.Location, currentPackage, currentVersion))
pkgs = append(pkgs, newYarnLockPackage(a.cfg, resolver, reader.Location, currentPackage, currentVersion))
parsedPackages.Add(currentPackage + "@" + currentVersion)
}
@ -70,7 +77,7 @@ func parseYarnLock(resolver file.Resolver, _ *generic.Environment, reader file.L
} else if version := findPackageVersion(line); version != noVersion {
currentVersion = version
} else if packageName, version := findPackageAndVersion(line); packageName != noPackage && version != noVersion && !parsedPackages.Has(packageName+"@"+version) {
pkgs = append(pkgs, newYarnLockPackage(resolver, reader.Location, packageName, version))
pkgs = append(pkgs, newYarnLockPackage(a.cfg, resolver, reader.Location, packageName, version))
parsedPackages.Add(packageName + "@" + version)
// Cleanup to indicate no unsaved identifiers
@ -81,7 +88,7 @@ func parseYarnLock(resolver file.Resolver, _ *generic.Environment, reader file.L
// check if we have valid unsaved data after end-of-file has reached
if currentPackage != noPackage && currentVersion != noVersion && !parsedPackages.Has(currentPackage+"@"+currentVersion) {
pkgs = append(pkgs, newYarnLockPackage(resolver, reader.Location, currentPackage, currentVersion))
pkgs = append(pkgs, newYarnLockPackage(a.cfg, resolver, reader.Location, currentPackage, currentVersion))
parsedPackages.Add(currentPackage + "@" + currentVersion)
}

View File

@ -1,6 +1,10 @@
package javascript
import (
"io"
"net/http"
"net/http/httptest"
"os"
"testing"
"github.com/stretchr/testify/assert"
@ -91,8 +95,8 @@ func TestParseYarnBerry(t *testing.T) {
},
}
pkgtest.TestFileParser(t, fixture, parseYarnLock, expectedPkgs, expectedRelationships)
adapter := newGenericYarnLockAdapter(CatalogerConfig{})
pkgtest.TestFileParser(t, fixture, adapter.parseYarnLock, expectedPkgs, expectedRelationships)
}
func TestParseYarnLock(t *testing.T) {
@ -177,8 +181,62 @@ func TestParseYarnLock(t *testing.T) {
},
}
pkgtest.TestFileParser(t, fixture, parseYarnLock, expectedPkgs, expectedRelationships)
adapter := newGenericYarnLockAdapter(CatalogerConfig{})
pkgtest.TestFileParser(t, fixture, adapter.parseYarnLock, expectedPkgs, expectedRelationships)
}
type handlerPath struct {
path string
handler func(w http.ResponseWriter, r *http.Request)
}
func TestSearchYarnForLicenses(t *testing.T) {
fixture := "test-fixtures/yarn-remote/yarn.lock"
locations := file.NewLocationSet(file.NewLocation(fixture))
mux, url, teardown := setup()
defer teardown()
tests := []struct {
name string
fixture string
config CatalogerConfig
requestHandlers []handlerPath
expectedPackages []pkg.Package
}{
{
name: "search remote licenses returns the expected licenses when search is set to true",
config: CatalogerConfig{searchRemoteLicenses: true},
requestHandlers: []handlerPath{
{
// https://registry.yarnpkg.com/@babel/code-frame/7.10.4
path: "/@babel/code-frame/7.10.4",
handler: generateMockNPMHandler("test-fixtures/yarn-remote/registry_response.json"),
},
},
expectedPackages: []pkg.Package{
{
Name: "@babel/code-frame",
Version: "7.10.4",
Locations: locations,
PURL: "pkg:npm/%40babel/code-frame@7.10.4",
Licenses: pkg.NewLicenseSet(pkg.NewLicense("MIT")),
Language: pkg.JavaScript,
Type: pkg.NpmPkg,
},
},
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
// set up the mock server
for _, handler := range tc.requestHandlers {
mux.HandleFunc(handler.path, handler.handler)
}
tc.config.npmBaseURL = url
adapter := newGenericYarnLockAdapter(tc.config)
pkgtest.TestFileParser(t, fixture, adapter.parseYarnLock, tc.expectedPackages, nil)
})
}
}
func TestParseYarnFindPackageNames(t *testing.T) {
@ -336,3 +394,40 @@ func TestParseYarnFindPackageVersions(t *testing.T) {
})
}
}
func generateMockNPMHandler(responseFixture string) func(w http.ResponseWriter, r *http.Request) {
return func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
// Copy the file's content to the response writer
file, err := os.Open(responseFixture)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
defer file.Close()
_, err = io.Copy(w, file)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
}
}
// setup sets up a test HTTP server for mocking requests to maven central.
// The returned url is injected into the Config so the client uses the test server.
// Tests should register handlers on mux to simulate the expected request/response structure
func setup() (mux *http.ServeMux, serverURL string, teardown func()) {
// mux is the HTTP request multiplexer used with the test server.
mux = http.NewServeMux()
// We want to ensure that tests catch mistakes where the endpoint URL is
// specified as absolute rather than relative. It only makes a difference
// when there's a non-empty base URL path. So, use that. See issue #752.
apiHandler := http.NewServeMux()
apiHandler.Handle("/", mux)
// server is a test HTTP server used to provide mock API responses.
server := httptest.NewServer(apiHandler)
return mux, server.URL, server.Close
}

View File

@ -0,0 +1,84 @@
{
"name": "@babel/code-frame",
"version": "7.10.4",
"description": "Generate errors that contain a code frame that point to source locations.",
"author": {
"name": "Sebastian McKenzie",
"email": "sebmck@gmail.com"
},
"homepage": "https://babeljs.io/",
"license": "MIT",
"publishConfig": {
"access": "public"
},
"repository": {
"type": "git",
"url": "git+https://github.com/babel/babel.git",
"directory": "packages/babel-code-frame"
},
"main": "lib/index.js",
"dependencies": {
"@babel/highlight": "^7.10.4"
},
"devDependencies": {
"chalk": "^2.0.0",
"strip-ansi": "^4.0.0"
},
"gitHead": "7fd40d86a0d03ff0e9c3ea16b29689945433d4df",
"bugs": {
"url": "https://github.com/babel/babel/issues"
},
"_id": "@babel/code-frame@7.10.4",
"_nodeVersion": "14.4.0",
"_npmVersion": "lerna/3.19.0/node@v14.4.0+x64 (darwin)",
"dist": {
"integrity": "sha512-vG6SvB6oYEhvgisZNFRmRCUkLz11c7rp+tbNTynGqc6mS1d5ATd/sGyV6W0KZZnXRKMTzZDRgQT3Ou9jhpAfUg==",
"shasum": "168da1a36e90da68ae8d49c0f1b48c7c6249213a",
"tarball": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.10.4.tgz",
"fileCount": 4,
"unpackedSize": 7723,
"npm-signature": "-----BEGIN PGP SIGNATURE-----\r\nVersion: OpenPGP.js v3.0.4\r\nComment: https://openpgpjs.org\r\n\r\nwsFcBAEBCAAQBQJe+zovCRA9TVsSAnZWagAAe8cQAKMtKF7FJx92Re+ol+4B\no0f3yGdzxWUhl8B+e8eXgjYcetMe1MZpG1oQF5ocYo45mZ+ASWMzfp1caT1p\nqt3F5rtADrL9int8ti7ICrWkIGQ3ccnrTxmtx1y9NRGANeARYjlB538xg0xy\n2Gin+2NchK2gxkxeX0nPH7LOcLlJpXecv1p2BK8gFZfabhM4fKfcMSSeljUp\ngzBGU/0CQp3++KsFAS/GMtgo6ZLw7wnHn0IVtDbwhA6A7hpDZar1Q5xsleui\nUTFIJCegGBx9exO1z0fYLGjOuvOdB5790fQnGsspbVTMwpImWpjspmuH1kWI\nTtk1ocnUnvEu1wNK2FMspUeKHNmOi5Jr6bkdGTxecXV4W/p9oEoPe3tHItL5\nbG9gNxY4IUkHeL93D86w9DtgaYtFSCYD6+sY7mQQQdhBrilR06AugBtQG3jP\nFpEsOLSn8vhYOQKv18CN//xaJM/uar40NTfZcQTn4VLXjUsuR4W+3eAv+qb+\nSKEpf8YAhgXJR5EFG1m3m4VHCp40SM1oSibh3/Ib74VZmlF/aq0VcPDs9tD2\no/MibYbQcP01cYxzpfObPXRczTH8TSl3scslcf7aVcLuhbyJtw8gblKnmXjo\njNaZrKjGKrpKRYStMBbCt8ILkL+OZTcDJcihy1x0v4oOWT43lrHnPntOAg7D\nqQbp\r\n=RhVa\r\n-----END PGP SIGNATURE-----\r\n",
"signatures": [
{
"keyid": "SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA",
"sig": "MEUCIQDSjBYOah3mnIxnAjEKv638MySMCxhZ0J9pexriecmdogIgAOaTRZg3UeVxrs8Khzr78+n4T+10hMn2Z9DRR7k5qEM="
}
]
},
"maintainers": [
{
"email": "daniel@tschinder.de",
"name": "danez"
},
{
"email": "bng412@gmail.com",
"name": "existentialism"
},
{
"email": "hi@henryzoo.com",
"name": "hzoo"
},
{
"email": "i@jhuang.me",
"name": "jlhwung"
},
{
"email": "loganfsmyth@gmail.com",
"name": "loganfsmyth"
},
{
"email": "nicolo.ribaudo@gmail.com",
"name": "nicolo-ribaudo"
}
],
"_npmUser": {
"name": "jlhwung",
"email": "i@jhuang.me"
},
"directories": {},
"_npmOperationalInternal": {
"host": "s3://npm-registry-packages",
"tmp": "tmp/code-frame_7.10.4_1593522734690_0.6416145193889038"
},
"_hasShrinkwrap": false
}

View File

@ -0,0 +1,10 @@
# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.
# yarn lockfile v1
"@babel/code-frame@^7.0.0", "@babel/code-frame@^7.10.4":
version "7.10.4"
resolved "https://registry.yarnpkg.com/@babel/code-frame/-/code-frame-7.10.4.tgz#168da1a36e90da68ae8d49c0f1b48c7c6249213a"
integrity sha512-vG6SvB6oYEhvgisZNFRmRCUkLz11c7rp+tbNTynGqc6mS1d5ATd/sGyV6W0KZZnXRKMTzZDRgQT3Ou9jhpAfUg==
dependencies:
"@babel/highlight" "^7.10.4"