Create single license scanner for all catalogers (#3348)

* add single license scanner instance

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* rename testing license scanner

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

---------

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
This commit is contained in:
Alex Goodman 2024-10-21 12:17:12 -04:00 committed by GitHub
parent 14355aac21
commit e4e985b9b0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 277 additions and 148 deletions

View File

@ -0,0 +1,18 @@
package licenses
import (
"context"
)
type licenseScannerKey struct{}
func SetContextLicenseScanner(ctx context.Context, s Scanner) context.Context {
return context.WithValue(ctx, licenseScannerKey{}, s)
}
func ContextLicenseScanner(ctx context.Context) Scanner {
if s, ok := ctx.Value(licenseScannerKey{}).(Scanner); ok {
return s
}
return NewDefaultScanner()
}

View File

@ -1,45 +0,0 @@
package licenses
import (
"io"
"github.com/google/licensecheck"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/license"
"github.com/anchore/syft/syft/pkg"
)
const (
coverageThreshold = 75
unknownLicenseType = "UNKNOWN"
)
// Parse scans the contents of a license file to attempt to determine the type of license it is
func Parse(reader io.Reader, l file.Location) (licenses []pkg.License, err error) {
licenses = make([]pkg.License, 0)
contents, err := io.ReadAll(reader)
if err != nil {
return nil, err
}
scanner, err := licensecheck.NewScanner(licensecheck.BuiltinLicenses())
if err != nil {
return nil, err
}
cov := scanner.Scan(contents)
if cov.Percent < coverageThreshold {
// unknown or no licenses here?
return licenses, nil
}
for _, m := range cov.Match {
lic := pkg.NewLicenseFromLocations(m.ID, l)
lic.Type = license.Concluded
licenses = append(licenses, lic)
}
return licenses, nil
}

View File

@ -0,0 +1,68 @@
package licenses
import (
"context"
"io"
"github.com/google/licensecheck"
"github.com/anchore/syft/internal/log"
)
const coverageThreshold = 75 // determined by experimentation
type Scanner interface {
IdentifyLicenseIDs(context.Context, io.Reader) ([]string, error)
}
var _ Scanner = (*scanner)(nil)
type scanner struct {
coverageThreshold float64 // between 0 and 100
scanner func([]byte) licensecheck.Coverage
}
// NewDefaultScanner returns a scanner that uses a new instance of the default licensecheck package scanner.
func NewDefaultScanner() Scanner {
s, err := licensecheck.NewScanner(licensecheck.BuiltinLicenses())
if err != nil {
log.WithFields("error", err).Trace("unable to create default license scanner")
s = nil
}
return &scanner{
coverageThreshold: coverageThreshold,
scanner: s.Scan,
}
}
// TestingOnlyScanner returns a scanner that uses the built-in license scanner from the licensecheck package.
// THIS IS ONLY MEANT FOR TEST CODE, NOT PRODUCTION CODE.
func TestingOnlyScanner() Scanner {
return &scanner{
coverageThreshold: coverageThreshold,
scanner: licensecheck.Scan,
}
}
func (s scanner) IdentifyLicenseIDs(_ context.Context, reader io.Reader) ([]string, error) {
if s.scanner == nil {
return nil, nil
}
content, err := io.ReadAll(reader)
if err != nil {
return nil, err
}
cov := s.scanner(content)
if cov.Percent < s.coverageThreshold {
// unknown or no licenses here?
return nil, nil
}
var ids []string
for _, m := range cov.Match {
ids = append(ids, m.ID)
}
return ids, nil
}

View File

@ -0,0 +1,28 @@
package licenses
import (
"context"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/license"
"github.com/anchore/syft/syft/pkg"
)
// Search scans the contents of a license file to attempt to determine the type of license it is
func Search(ctx context.Context, scanner Scanner, reader file.LocationReadCloser) (licenses []pkg.License, err error) {
licenses = make([]pkg.License, 0)
ids, err := scanner.IdentifyLicenseIDs(ctx, reader)
if err != nil {
return nil, err
}
for _, id := range ids {
lic := pkg.NewLicenseFromLocations(id, reader.Location)
lic.Type = license.Concluded
licenses = append(licenses, lic)
}
return licenses, nil
}

View File

@ -9,6 +9,7 @@ import (
"github.com/scylladb/go-set/strset"
"github.com/anchore/syft/internal/bus"
"github.com/anchore/syft/internal/licenses"
"github.com/anchore/syft/internal/sbomsync"
"github.com/anchore/syft/internal/task"
"github.com/anchore/syft/syft/artifact"
@ -60,6 +61,9 @@ func CreateSBOM(ctx context.Context, src source.Source, cfg *CreateSBOMConfig) (
},
}
// inject a single license scanner for all package cataloging tasks into context
ctx = licenses.SetContextLicenseScanner(ctx, licenses.NewDefaultScanner())
catalogingProgress := monitorCatalogingTask(src.ID(), taskGroups)
packageCatalogingProgress := monitorPackageCatalogingTask()

View File

@ -154,7 +154,7 @@ func (c *Cataloger) Catalog(ctx context.Context, resolver file.Resolver) ([]pkg.
var relationships []artifact.Relationship
var errs error
logger := log.Nested("cataloger", c.upstreamCataloger)
lgr := log.Nested("cataloger", c.upstreamCataloger)
env := Environment{
// TODO: consider passing into the cataloger, this would affect the cataloger interface (and all implementations). This can be deferred until later.
@ -166,7 +166,7 @@ func (c *Cataloger) Catalog(ctx context.Context, resolver file.Resolver) ([]pkg.
log.WithFields("path", location.RealPath).Trace("parsing file contents")
discoveredPackages, discoveredRelationships, err := invokeParser(ctx, resolver, location, logger, parser, &env)
discoveredPackages, discoveredRelationships, err := invokeParser(ctx, resolver, location, lgr, parser, &env)
if err != nil {
// parsers may return errors and valid packages / relationships
errs = unknown.Append(errs, location, err)

View File

@ -3,6 +3,7 @@ package golang
import (
"archive/zip"
"bytes"
"context"
"fmt"
"io"
"io/fs"
@ -79,9 +80,9 @@ func remotesForModule(proxies []string, noProxy []string, module string) []strin
return proxies
}
func (c *goLicenseResolver) getLicenses(resolver file.Resolver, moduleName, moduleVersion string) ([]pkg.License, error) {
func (c *goLicenseResolver) getLicenses(ctx context.Context, scanner licenses.Scanner, resolver file.Resolver, moduleName, moduleVersion string) ([]pkg.License, error) {
// search the scan target first, ignoring local and remote sources
goLicenses, err := c.findLicensesInSource(resolver,
goLicenses, err := c.findLicensesInSource(ctx, scanner, resolver,
fmt.Sprintf(`**/go/pkg/mod/%s@%s/*`, processCaps(moduleName), moduleVersion),
)
if err != nil || len(goLicenses) > 0 {
@ -90,7 +91,7 @@ func (c *goLicenseResolver) getLicenses(resolver file.Resolver, moduleName, modu
// look in the local host mod directory...
if c.opts.SearchLocalModCacheLicenses {
goLicenses, err = c.getLicensesFromLocal(moduleName, moduleVersion)
goLicenses, err = c.getLicensesFromLocal(ctx, scanner, moduleName, moduleVersion)
if err != nil || len(goLicenses) > 0 {
return toPkgLicenses(goLicenses), err
}
@ -98,13 +99,13 @@ func (c *goLicenseResolver) getLicenses(resolver file.Resolver, moduleName, modu
// download from remote sources
if c.opts.SearchRemoteLicenses {
goLicenses, err = c.getLicensesFromRemote(moduleName, moduleVersion)
goLicenses, err = c.getLicensesFromRemote(ctx, scanner, moduleName, moduleVersion)
}
return toPkgLicenses(goLicenses), err
}
func (c *goLicenseResolver) getLicensesFromLocal(moduleName, moduleVersion string) ([]goLicense, error) {
func (c *goLicenseResolver) getLicensesFromLocal(ctx context.Context, scanner licenses.Scanner, moduleName, moduleVersion string) ([]goLicense, error) {
if c.localModCacheDir == nil {
return nil, nil
}
@ -120,10 +121,10 @@ func (c *goLicenseResolver) getLicensesFromLocal(moduleName, moduleVersion strin
// if we're running against a directory on the filesystem, it may not include the
// user's homedir / GOPATH, so we defer to using the localModCacheResolver
// we use $GOPATH/pkg/mod to avoid leaking information about the user's system
return c.findLicensesInFS("file://$GOPATH/pkg/mod/"+subdir+"/", dir)
return c.findLicensesInFS(ctx, scanner, "file://$GOPATH/pkg/mod/"+subdir+"/", dir)
}
func (c *goLicenseResolver) getLicensesFromRemote(moduleName, moduleVersion string) ([]goLicense, error) {
func (c *goLicenseResolver) getLicensesFromRemote(ctx context.Context, scanner licenses.Scanner, moduleName, moduleVersion string) ([]goLicense, error) {
return c.licenseCache.Resolve(fmt.Sprintf("%s/%s", moduleName, moduleVersion), func() ([]goLicense, error) {
proxies := remotesForModule(c.opts.Proxies, c.opts.NoProxy, moduleName)
@ -132,11 +133,11 @@ func (c *goLicenseResolver) getLicensesFromRemote(moduleName, moduleVersion stri
return nil, err
}
return c.findLicensesInFS(urlPrefix, fsys)
return c.findLicensesInFS(ctx, scanner, urlPrefix, fsys)
})
}
func (c *goLicenseResolver) findLicensesInFS(urlPrefix string, fsys fs.FS) ([]goLicense, error) {
func (c *goLicenseResolver) findLicensesInFS(ctx context.Context, scanner licenses.Scanner, urlPrefix string, fsys fs.FS) ([]goLicense, error) {
var out []goLicense
err := fs.WalkDir(fsys, ".", func(filePath string, d fs.DirEntry, err error) error {
if err != nil {
@ -156,7 +157,8 @@ func (c *goLicenseResolver) findLicensesInFS(urlPrefix string, fsys fs.FS) ([]go
return nil
}
defer internal.CloseAndLogError(rdr, filePath)
parsed, err := licenses.Parse(rdr, file.NewLocation(filePath))
parsed, err := licenses.Search(ctx, scanner, file.NewLocationReadCloser(file.NewLocation(filePath), rdr))
if err != nil {
log.Debugf("error parsing license file %s: %v", filePath, err)
return nil
@ -174,7 +176,7 @@ func (c *goLicenseResolver) findLicensesInFS(urlPrefix string, fsys fs.FS) ([]go
return out, err
}
func (c *goLicenseResolver) findLicensesInSource(resolver file.Resolver, globMatch string) ([]goLicense, error) {
func (c *goLicenseResolver) findLicensesInSource(ctx context.Context, scanner licenses.Scanner, resolver file.Resolver, globMatch string) ([]goLicense, error) {
var out []goLicense
locations, err := resolver.FilesByGlob(globMatch)
if err != nil {
@ -182,7 +184,7 @@ func (c *goLicenseResolver) findLicensesInSource(resolver file.Resolver, globMat
}
for _, l := range locations {
parsed, err := c.parseLicenseFromLocation(l, resolver)
parsed, err := c.parseLicenseFromLocation(ctx, scanner, l, resolver)
if err != nil {
return nil, err
}
@ -200,7 +202,7 @@ func (c *goLicenseResolver) findLicensesInSource(resolver file.Resolver, globMat
return out, nil
}
func (c *goLicenseResolver) parseLicenseFromLocation(l file.Location, resolver file.Resolver) ([]goLicense, error) {
func (c *goLicenseResolver) parseLicenseFromLocation(ctx context.Context, scanner licenses.Scanner, l file.Location, resolver file.Resolver) ([]goLicense, error) {
var out []goLicense
fileName := path.Base(l.RealPath)
if c.lowerLicenseFileNames.Has(strings.ToLower(fileName)) {
@ -209,7 +211,7 @@ func (c *goLicenseResolver) parseLicenseFromLocation(l file.Location, resolver f
return nil, err
}
defer internal.CloseAndLogError(contents, l.RealPath)
parsed, err := licenses.Parse(contents, l)
parsed, err := licenses.Search(ctx, scanner, file.NewLocationReadCloser(l, contents))
if err != nil {
return nil, err
}

View File

@ -3,6 +3,7 @@ package golang
import (
"archive/zip"
"bytes"
"context"
"fmt"
"io/fs"
"net/http"
@ -15,6 +16,7 @@ import (
"github.com/stretchr/testify/require"
"github.com/anchore/syft/internal/licenses"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/internal/fileresolver"
"github.com/anchore/syft/syft/license"
@ -26,6 +28,8 @@ func Test_LocalLicenseSearch(t *testing.T) {
loc2 := file.NewLocation("github.com/!cap!o!r!g/!cap!project@v4.111.5/LICENSE.txt")
loc3 := file.NewLocation("github.com/someorg/strangelicense@v1.2.3/LiCeNsE.tXt")
licenseScanner := licenses.TestingOnlyScanner()
tests := []struct {
name string
version string
@ -78,12 +82,12 @@ func Test_LocalLicenseSearch(t *testing.T) {
LocalModCacheDir: filepath.Join(wd, "test-fixtures", "licenses", "pkg", "mod"),
},
)
licenses, err := l.getLicenses(fileresolver.Empty{}, test.name, test.version)
lics, err := l.getLicenses(context.Background(), licenseScanner, fileresolver.Empty{}, test.name, test.version)
require.NoError(t, err)
require.Len(t, licenses, 1)
require.Len(t, lics, 1)
require.Equal(t, test.expected, licenses[0])
require.Equal(t, test.expected, lics[0])
})
}
}
@ -92,6 +96,8 @@ func Test_RemoteProxyLicenseSearch(t *testing.T) {
loc1 := file.NewLocation("github.com/someorg/somename@v0.3.2/LICENSE")
loc2 := file.NewLocation("github.com/!cap!o!r!g/!cap!project@v4.111.5/LICENSE.txt")
licenseScanner := licenses.TestingOnlyScanner()
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
buf := &bytes.Buffer{}
uri := strings.TrimPrefix(strings.TrimSuffix(r.RequestURI, ".zip"), "/")
@ -169,12 +175,12 @@ func Test_RemoteProxyLicenseSearch(t *testing.T) {
},
)
licenses, err := l.getLicenses(fileresolver.Empty{}, test.name, test.version)
lics, err := l.getLicenses(context.Background(), licenseScanner, fileresolver.Empty{}, test.name, test.version)
require.NoError(t, err)
require.Len(t, licenses, 1)
require.Len(t, lics, 1)
require.Equal(t, test.expected, licenses[0])
require.Equal(t, test.expected, lics[0])
})
}
}
@ -248,7 +254,7 @@ func Test_findVersionPath(t *testing.T) {
func Test_walkDirErrors(t *testing.T) {
resolver := newGoLicenseResolver("", CatalogerConfig{})
_, err := resolver.findLicensesInFS("somewhere", badFS{})
_, err := resolver.findLicensesInFS(context.Background(), licenses.TestingOnlyScanner(), "somewhere", badFS{})
require.Error(t, err)
}
@ -266,6 +272,8 @@ func Test_noLocalGoModDir(t *testing.T) {
validTmp := t.TempDir()
require.NoError(t, os.MkdirAll(filepath.Join(validTmp, "mod@ver"), 0700|os.ModeDir))
licenseScanner := licenses.TestingOnlyScanner()
tests := []struct {
name string
dir string
@ -299,7 +307,7 @@ func Test_noLocalGoModDir(t *testing.T) {
SearchLocalModCacheLicenses: true,
LocalModCacheDir: test.dir,
})
_, err := resolver.getLicensesFromLocal("mod", "ver")
_, err := resolver.getLicensesFromLocal(context.Background(), licenseScanner, "mod", "ver")
test.wantErr(t, err)
})
}

View File

@ -5,21 +5,15 @@ import (
"strings"
"github.com/anchore/packageurl-go"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
)
func (c *goBinaryCataloger) newGoBinaryPackage(resolver file.Resolver, dep *debug.Module, mainModule, goVersion, architecture string, buildSettings pkg.KeyValues, cryptoSettings, experiments []string, locations ...file.Location) pkg.Package {
func (c *goBinaryCataloger) newGoBinaryPackage(dep *debug.Module, mainModule, goVersion, architecture string, buildSettings pkg.KeyValues, cryptoSettings, experiments []string, licenses []pkg.License, locations ...file.Location) pkg.Package {
if dep.Replace != nil {
dep = dep.Replace
}
licenses, err := c.licenseResolver.getLicenses(resolver, dep.Path, dep.Version)
if err != nil {
log.Tracef("error getting licenses for golang package: %s %v", dep.Path, err)
}
p := pkg.Package{
Name: dep.Path,
Version: dep.Version,

View File

@ -18,6 +18,7 @@ import (
"golang.org/x/mod/module"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/licenses"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
@ -59,9 +60,11 @@ func newGoBinaryCataloger(opts CatalogerConfig) *goBinaryCataloger {
}
// parseGoBinary catalogs packages found in the "buildinfo" section of a binary built by the go compiler.
func (c *goBinaryCataloger) parseGoBinary(_ context.Context, resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
func (c *goBinaryCataloger) parseGoBinary(ctx context.Context, resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
var pkgs []pkg.Package
licenseScanner := licenses.ContextLicenseScanner(ctx)
unionReader, err := unionreader.GetUnionReader(reader.ReadCloser)
if err != nil {
return nil, nil, err
@ -73,7 +76,7 @@ func (c *goBinaryCataloger) parseGoBinary(_ context.Context, resolver file.Resol
var rels []artifact.Relationship
for _, mod := range mods {
var depPkgs []pkg.Package
mainPkg, depPkgs := c.buildGoPkgInfo(resolver, reader.Location, mod, mod.arch, unionReader)
mainPkg, depPkgs := c.buildGoPkgInfo(ctx, licenseScanner, resolver, reader.Location, mod, mod.arch, unionReader)
if mainPkg != nil {
rels = createModuleRelationships(*mainPkg, depPkgs)
pkgs = append(pkgs, *mainPkg)
@ -101,7 +104,7 @@ func createModuleRelationships(main pkg.Package, deps []pkg.Package) []artifact.
var emptyModule debug.Module
var moduleFromPartialPackageBuild = debug.Module{Path: "command-line-arguments"}
func (c *goBinaryCataloger) buildGoPkgInfo(resolver file.Resolver, location file.Location, mod *extendedBuildInfo, arch string, reader io.ReadSeekCloser) (*pkg.Package, []pkg.Package) {
func (c *goBinaryCataloger) buildGoPkgInfo(ctx context.Context, licenseScanner licenses.Scanner, resolver file.Resolver, location file.Location, mod *extendedBuildInfo, arch string, reader io.ReadSeekCloser) (*pkg.Package, []pkg.Package) {
if mod == nil {
return nil, nil
}
@ -116,9 +119,13 @@ func (c *goBinaryCataloger) buildGoPkgInfo(resolver file.Resolver, location file
continue
}
lics, err := c.licenseResolver.getLicenses(ctx, licenseScanner, resolver, dep.Path, dep.Version)
if err != nil {
log.Tracef("error getting licenses for golang package: %s %v", dep.Path, err)
}
gover, experiments := getExperimentsFromVersion(mod.GoVersion)
p := c.newGoBinaryPackage(
resolver,
dep,
mod.Main.Path,
gover,
@ -126,6 +133,7 @@ func (c *goBinaryCataloger) buildGoPkgInfo(resolver file.Resolver, location file
nil,
mod.cryptoSettings,
experiments,
lics,
location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
)
if pkg.IsValid(&p) {
@ -137,7 +145,7 @@ func (c *goBinaryCataloger) buildGoPkgInfo(resolver file.Resolver, location file
return nil, pkgs
}
main := c.makeGoMainPackage(resolver, mod, arch, location, reader)
main := c.makeGoMainPackage(ctx, licenseScanner, resolver, mod, arch, location, reader)
return &main, pkgs
}
@ -152,11 +160,16 @@ func missingMainModule(mod *extendedBuildInfo) bool {
return mod.Main == moduleFromPartialPackageBuild
}
func (c *goBinaryCataloger) makeGoMainPackage(resolver file.Resolver, mod *extendedBuildInfo, arch string, location file.Location, reader io.ReadSeekCloser) pkg.Package {
func (c *goBinaryCataloger) makeGoMainPackage(ctx context.Context, licenseScanner licenses.Scanner, resolver file.Resolver, mod *extendedBuildInfo, arch string, location file.Location, reader io.ReadSeekCloser) pkg.Package {
gbs := getBuildSettings(mod.Settings)
lics, err := c.licenseResolver.getLicenses(ctx, licenseScanner, resolver, mod.Main.Path, mod.Main.Version)
if err != nil {
log.Tracef("error getting licenses for golang package: %s %v", mod.Main.Path, err)
}
gover, experiments := getExperimentsFromVersion(mod.GoVersion)
main := c.newGoBinaryPackage(
resolver,
&mod.Main,
mod.Main.Path,
gover,
@ -164,6 +177,7 @@ func (c *goBinaryCataloger) makeGoMainPackage(resolver file.Resolver, mod *exten
gbs,
mod.cryptoSettings,
experiments,
lics,
location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
)

View File

@ -3,6 +3,7 @@ package golang
import (
"bufio"
"bytes"
"context"
"errors"
"io"
"os"
@ -17,6 +18,7 @@ import (
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/anchore/syft/internal/licenses"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/internal/fileresolver"
"github.com/anchore/syft/syft/internal/unionreader"
@ -167,6 +169,8 @@ func TestBuildGoPkgInfo(t *testing.T) {
},
}
licenseScanner := licenses.TestingOnlyScanner()
tests := []struct {
name string
mod *extendedBuildInfo
@ -1053,7 +1057,7 @@ func TestBuildGoPkgInfo(t *testing.T) {
c := newGoBinaryCataloger(DefaultCatalogerConfig())
reader, err := unionreader.GetUnionReader(io.NopCloser(strings.NewReader(test.binaryContent)))
require.NoError(t, err)
mainPkg, pkgs := c.buildGoPkgInfo(fileresolver.Empty{}, location, test.mod, test.mod.arch, reader)
mainPkg, pkgs := c.buildGoPkgInfo(context.Background(), licenseScanner, fileresolver.Empty{}, location, test.mod, test.mod.arch, reader)
if mainPkg != nil {
pkgs = append(pkgs, *mainPkg)
}

View File

@ -11,6 +11,7 @@ import (
"golang.org/x/mod/modfile"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/licenses"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
@ -31,9 +32,11 @@ func newGoModCataloger(opts CatalogerConfig) *goModCataloger {
// parseGoModFile takes a go.mod and lists all packages discovered.
//
//nolint:funlen
func (c *goModCataloger) parseGoModFile(_ context.Context, resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
func (c *goModCataloger) parseGoModFile(ctx context.Context, resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
packages := make(map[string]pkg.Package)
licenseScanner := licenses.ContextLicenseScanner(ctx)
contents, err := io.ReadAll(reader)
if err != nil {
return nil, nil, fmt.Errorf("failed to read go module: %w", err)
@ -50,7 +53,7 @@ func (c *goModCataloger) parseGoModFile(_ context.Context, resolver file.Resolve
}
for _, m := range f.Require {
licenses, err := c.licenseResolver.getLicenses(resolver, m.Mod.Path, m.Mod.Version)
lics, err := c.licenseResolver.getLicenses(ctx, licenseScanner, resolver, m.Mod.Path, m.Mod.Version)
if err != nil {
log.Tracef("error getting licenses for package: %s %v", m.Mod.Path, err)
}
@ -58,7 +61,7 @@ func (c *goModCataloger) parseGoModFile(_ context.Context, resolver file.Resolve
packages[m.Mod.Path] = pkg.Package{
Name: m.Mod.Path,
Version: m.Mod.Version,
Licenses: pkg.NewLicenseSet(licenses...),
Licenses: pkg.NewLicenseSet(lics...),
Locations: file.NewLocationSet(reader.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation)),
PURL: packageURL(m.Mod.Path, m.Mod.Version),
Language: pkg.Go,
@ -71,7 +74,7 @@ func (c *goModCataloger) parseGoModFile(_ context.Context, resolver file.Resolve
// remove any old packages and replace with new ones...
for _, m := range f.Replace {
licenses, err := c.licenseResolver.getLicenses(resolver, m.New.Path, m.New.Version)
lics, err := c.licenseResolver.getLicenses(ctx, licenseScanner, resolver, m.New.Path, m.New.Version)
if err != nil {
log.Tracef("error getting licenses for package: %s %v", m.New.Path, err)
}
@ -83,7 +86,7 @@ func (c *goModCataloger) parseGoModFile(_ context.Context, resolver file.Resolve
packages[m.New.Path] = pkg.Package{
Name: m.New.Path,
Version: m.New.Version,
Licenses: pkg.NewLicenseSet(licenses...),
Licenses: pkg.NewLicenseSet(lics...),
Locations: file.NewLocationSet(reader.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation)),
PURL: packageURL(m.New.Path, m.New.Version),
Language: pkg.Go,

View File

@ -4,6 +4,7 @@ import (
"context"
"crypto"
"fmt"
"io"
"os"
"path"
"slices"
@ -49,14 +50,15 @@ var javaArchiveHashes = []crypto.Hash{
}
type archiveParser struct {
fileManifest intFile.ZipFileManifest
location file.Location
archivePath string
contentPath string
fileInfo archiveFilename
detectNested bool
cfg ArchiveCatalogerConfig
maven *mavenResolver
fileManifest intFile.ZipFileManifest
location file.Location
archivePath string
contentPath string
fileInfo archiveFilename
detectNested bool
cfg ArchiveCatalogerConfig
maven *mavenResolver
licenseScanner licenses.Scanner
}
type genericArchiveParserAdapter struct {
@ -69,7 +71,7 @@ func newGenericArchiveParserAdapter(cfg ArchiveCatalogerConfig) genericArchivePa
// parseJavaArchive is a parser function for java archive contents, returning all Java libraries and nested archives.
func (gap genericArchiveParserAdapter) parseJavaArchive(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
parser, cleanupFn, err := newJavaArchiveParser(reader, true, gap.cfg)
parser, cleanupFn, err := newJavaArchiveParser(ctx, reader, true, gap.cfg)
// note: even on error, we should always run cleanup functions
defer cleanupFn()
if err != nil {
@ -88,7 +90,9 @@ func uniquePkgKey(groupID string, p *pkg.Package) string {
// newJavaArchiveParser returns a new java archive parser object for the given archive. Can be configured to discover
// and parse nested archives or ignore them.
func newJavaArchiveParser(reader file.LocationReadCloser, detectNested bool, cfg ArchiveCatalogerConfig) (*archiveParser, func(), error) {
func newJavaArchiveParser(ctx context.Context, reader file.LocationReadCloser, detectNested bool, cfg ArchiveCatalogerConfig) (*archiveParser, func(), error) {
licenseScanner := licenses.ContextLicenseScanner(ctx)
// fetch the last element of the virtual path
virtualElements := strings.Split(reader.Path(), ":")
currentFilepath := virtualElements[len(virtualElements)-1]
@ -104,14 +108,15 @@ func newJavaArchiveParser(reader file.LocationReadCloser, detectNested bool, cfg
}
return &archiveParser{
fileManifest: fileManifest,
location: reader.Location,
archivePath: archivePath,
contentPath: contentPath,
fileInfo: newJavaArchiveFilename(currentFilepath),
detectNested: detectNested,
cfg: cfg,
maven: newMavenResolver(nil, cfg),
fileManifest: fileManifest,
location: reader.Location,
archivePath: archivePath,
contentPath: contentPath,
fileInfo: newJavaArchiveFilename(currentFilepath),
detectNested: detectNested,
cfg: cfg,
maven: newMavenResolver(nil, cfg),
licenseScanner: licenseScanner,
}, cleanupFn, nil
}
@ -220,7 +225,7 @@ func (j *archiveParser) discoverMainPackage(ctx context.Context) (*pkg.Package,
return nil, err
}
name, version, licenses, err := j.discoverNameVersionLicense(ctx, manifest)
name, version, lics, err := j.discoverNameVersionLicense(ctx, manifest)
if err != nil {
return nil, err
}
@ -230,7 +235,7 @@ func (j *archiveParser) discoverMainPackage(ctx context.Context) (*pkg.Package,
Name: name,
Version: version,
Language: pkg.Java,
Licenses: pkg.NewLicenseSet(licenses...),
Licenses: pkg.NewLicenseSet(lics...),
Locations: file.NewLocationSet(
j.location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
),
@ -246,7 +251,7 @@ func (j *archiveParser) discoverMainPackage(ctx context.Context) (*pkg.Package,
func (j *archiveParser) discoverNameVersionLicense(ctx context.Context, manifest *pkg.JavaManifest) (string, string, []pkg.License, error) {
// we use j.location because we want to associate the license declaration with where we discovered the contents in the manifest
// TODO: when we support locations of paths within archives we should start passing the specific manifest location object instead of the top jar
licenses := pkg.NewLicensesFromLocation(j.location, selectLicenses(manifest)...)
lics := pkg.NewLicensesFromLocation(j.location, selectLicenses(manifest)...)
/*
We should name and version from, in this order:
1. pom.properties if we find exactly 1
@ -262,25 +267,25 @@ func (j *archiveParser) discoverNameVersionLicense(ctx context.Context, manifest
version = selectVersion(manifest, j.fileInfo)
}
if len(licenses) == 0 {
fileLicenses, err := j.getLicenseFromFileInArchive()
if len(lics) == 0 {
fileLicenses, err := j.getLicenseFromFileInArchive(ctx)
if err != nil {
return "", "", nil, err
}
if fileLicenses != nil {
licenses = append(licenses, fileLicenses...)
lics = append(lics, fileLicenses...)
}
}
// If we didn't find any licenses in the archive so far, we'll try again in Maven Central using groupIDFromJavaMetadata
if len(licenses) == 0 {
if len(lics) == 0 {
// Today we don't have a way to distinguish between licenses from the manifest and licenses from the pom.xml
// until the file.Location object can support sub-paths (i.e. paths within archives, recursively; issue https://github.com/anchore/syft/issues/2211).
// Until then it's less confusing to use the licenses from the pom.xml only if the manifest did not list any.
licenses = j.findLicenseFromJavaMetadata(ctx, groupID, artifactID, version, parsedPom, manifest)
lics = j.findLicenseFromJavaMetadata(ctx, groupID, artifactID, version, parsedPom, manifest)
}
return artifactID, version, licenses, nil
return artifactID, version, lics, nil
}
// findLicenseFromJavaMetadata attempts to find license information from all available maven metadata properties and pom info
@ -446,7 +451,7 @@ func getDigestsFromArchive(archivePath string) ([]file.Digest, error) {
return digests, nil
}
func (j *archiveParser) getLicenseFromFileInArchive() ([]pkg.License, error) {
func (j *archiveParser) getLicenseFromFileInArchive(ctx context.Context) ([]pkg.License, error) {
var fileLicenses []pkg.License
for _, filename := range licenses.FileNames() {
licenseMatches := j.fileManifest.GlobMatch(true, "/META-INF/"+filename)
@ -463,7 +468,8 @@ func (j *archiveParser) getLicenseFromFileInArchive() ([]pkg.License, error) {
for _, licenseMatch := range licenseMatches {
licenseContents := contents[licenseMatch]
parsed, err := licenses.Parse(strings.NewReader(licenseContents), j.location)
r := strings.NewReader(licenseContents)
parsed, err := licenses.Search(ctx, j.licenseScanner, file.NewLocationReadCloser(j.location, io.NopCloser(r)))
if err != nil {
return nil, err
}

View File

@ -20,6 +20,7 @@ import (
"github.com/stretchr/testify/require"
"github.com/vifraa/gopom"
"github.com/anchore/syft/internal/licenses"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/license"
@ -30,6 +31,8 @@ import (
func TestSearchMavenForLicenses(t *testing.T) {
url := mockMavenRepo(t)
ctx := licenses.SetContextLicenseScanner(context.Background(), licenses.TestingOnlyScanner())
tests := []struct {
name string
fixture string
@ -71,6 +74,7 @@ func TestSearchMavenForLicenses(t *testing.T) {
// setup parser
ap, cleanupFn, err := newJavaArchiveParser(
ctx,
file.LocationReadCloser{
Location: file.NewLocation(fixture.Name()),
ReadCloser: fixture,
@ -86,6 +90,8 @@ func TestSearchMavenForLicenses(t *testing.T) {
}
func TestParseJar(t *testing.T) {
ctx := licenses.SetContextLicenseScanner(context.Background(), licenses.TestingOnlyScanner())
tests := []struct {
name string
fixture string
@ -347,10 +353,12 @@ func TestParseJar(t *testing.T) {
UseNetwork: false,
UseMavenLocalRepository: false,
}
parser, cleanupFn, err := newJavaArchiveParser(file.LocationReadCloser{
Location: file.NewLocation(fixture.Name()),
ReadCloser: fixture,
}, false, cfg)
parser, cleanupFn, err := newJavaArchiveParser(
ctx,
file.LocationReadCloser{
Location: file.NewLocation(fixture.Name()),
ReadCloser: fixture,
}, false, cfg)
defer cleanupFn()
require.NoError(t, err)
@ -1352,6 +1360,8 @@ func Test_parseJavaArchive_regressions(t *testing.T) {
}
func Test_deterministicMatchingPomProperties(t *testing.T) {
ctx := licenses.SetContextLicenseScanner(context.Background(), licenses.TestingOnlyScanner())
tests := []struct {
fixture string
expected mavenID
@ -1371,10 +1381,12 @@ func Test_deterministicMatchingPomProperties(t *testing.T) {
fixture, err := os.Open(fixturePath)
require.NoError(t, err)
parser, cleanupFn, err := newJavaArchiveParser(file.LocationReadCloser{
Location: file.NewLocation(fixture.Name()),
ReadCloser: fixture,
}, false, ArchiveCatalogerConfig{UseNetwork: false})
parser, cleanupFn, err := newJavaArchiveParser(
ctx,
file.LocationReadCloser{
Location: file.NewLocation(fixture.Name()),
ReadCloser: fixture,
}, false, ArchiveCatalogerConfig{UseNetwork: false})
defer cleanupFn()
require.NoError(t, err)

View File

@ -1,6 +1,7 @@
package python
import (
"context"
"fmt"
"regexp"
"strings"
@ -72,7 +73,26 @@ func newPackageForRequirementsWithMetadata(name, version string, metadata pkg.Py
return p
}
func newPackageForPackage(resolver file.Resolver, m parsedData, sources ...file.Location) pkg.Package {
func newPackageForPackage(m parsedData, licenses pkg.LicenseSet, sources ...file.Location) pkg.Package {
name := normalize(m.Name)
p := pkg.Package{
Name: name,
Version: m.Version,
PURL: packageURL(name, m.Version, &m.PythonPackage),
Locations: file.NewLocationSet(sources...),
Licenses: licenses,
Language: pkg.Python,
Type: pkg.PythonPkg,
Metadata: m.PythonPackage,
}
p.SetID()
return p
}
func findLicenses(ctx context.Context, scanner licenses.Scanner, resolver file.Resolver, m parsedData) pkg.LicenseSet {
var licenseSet pkg.LicenseSet
switch {
@ -89,7 +109,7 @@ func newPackageForPackage(resolver file.Resolver, m parsedData, sources ...file.
if len(found) > 0 {
metadataContents, err := resolver.FileContentsByLocation(found[0])
if err == nil {
parsed, err := licenses.Parse(metadataContents, m.LicenseLocation)
parsed, err := licenses.Search(ctx, scanner, file.NewLocationReadCloser(m.LicenseLocation, metadataContents))
if err != nil {
log.WithFields("error", err).Tracef("unable to parse a license from the file in %s", m.LicenseLocation.Path())
}
@ -101,23 +121,7 @@ func newPackageForPackage(resolver file.Resolver, m parsedData, sources ...file.
}
}
}
name := normalize(m.Name)
p := pkg.Package{
Name: name,
Version: m.Version,
PURL: packageURL(name, m.Version, &m.PythonPackage),
Locations: file.NewLocationSet(sources...),
Licenses: licenseSet,
Language: pkg.Python,
Type: pkg.PythonPkg,
Metadata: m.PythonPackage,
}
p.SetID()
return p
return licenseSet
}
func packageURL(name, version string, m *pkg.PythonPackage) string {

View File

@ -9,6 +9,7 @@ import (
"path/filepath"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/licenses"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
@ -18,7 +19,9 @@ import (
// parseWheelOrEgg takes the primary metadata file reference and returns the python package it represents. Contained
// fields are governed by the PyPA core metadata specification (https://packaging.python.org/en/latest/specifications/core-metadata/).
func parseWheelOrEgg(_ context.Context, resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
func parseWheelOrEgg(ctx context.Context, resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
licenseScanner := licenses.ContextLicenseScanner(ctx)
pd, sources, err := assembleEggOrWheelMetadata(resolver, reader.Location)
if err != nil {
return nil, nil, err
@ -33,7 +36,13 @@ func parseWheelOrEgg(_ context.Context, resolver file.Resolver, _ *generic.Envir
return nil, nil, nil
}
pkgs := []pkg.Package{newPackageForPackage(resolver, *pd, sources...)}
pkgs := []pkg.Package{
newPackageForPackage(
*pd,
findLicenses(ctx, licenseScanner, resolver, *pd),
sources...,
),
}
return pkgs, nil, nil
}
@ -60,7 +69,7 @@ func fetchInstalledFiles(resolver file.Resolver, metadataLocation file.Location,
// parse the installed-files contents
installedFiles, err := parseInstalledFiles(installedFilesContents, metadataLocation.RealPath, sitePackagesRootPath)
if err != nil {
log.Warnf("unable to parse installed-files.txt for python package=%+v: %w", metadataLocation.RealPath, err)
log.WithFields("error", err, "path", metadataLocation.RealPath).Trace("unable to parse installed-files.txt for python package")
return files, sources, nil
}