feat: license file search (#4327)

Signed-off-by: Keith Zantow <kzantow@gmail.com>
This commit is contained in:
Keith Zantow 2025-11-03 14:16:05 -05:00 committed by GitHub
parent 7c154e7c37
commit a400c675fc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
32 changed files with 616 additions and 210 deletions

View File

@ -1,9 +1,12 @@
package dart
import (
"context"
"github.com/anchore/packageurl-go"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/licenses"
)
func newPubspecLockPackage(name string, raw pubspecLockPackage, locations ...file.Location) pkg.Package {
@ -29,7 +32,7 @@ func newPubspecLockPackage(name string, raw pubspecLockPackage, locations ...fil
return p
}
func newPubspecPackage(raw pubspecPackage, locations ...file.Location) pkg.Package {
func newPubspecPackage(ctx context.Context, resolver file.Resolver, raw pubspecPackage, locations ...file.Location) pkg.Package {
var env *pkg.DartPubspecEnvironment
if raw.Environment.SDK != "" || raw.Environment.Flutter != "" {
// this is required only after pubspec v2, but might have been optional before this
@ -58,6 +61,8 @@ func newPubspecPackage(raw pubspecPackage, locations ...file.Location) pkg.Packa
p.SetID()
p = licenses.RelativeToPackage(ctx, resolver, p)
return p
}

View File

@ -29,7 +29,7 @@ type dartPubspecEnvironment struct {
Flutter string `mapstructure:"flutter" yaml:"flutter"`
}
func parsePubspec(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
func parsePubspec(ctx context.Context, resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
var pkgs []pkg.Package
dec := yaml.NewDecoder(reader)
@ -41,6 +41,8 @@ func parsePubspec(_ context.Context, _ file.Resolver, _ *generic.Environment, re
pkgs = append(pkgs,
newPubspecPackage(
ctx,
resolver,
p,
reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
),

View File

@ -4,18 +4,20 @@ import (
"context"
"fmt"
"path/filepath"
"regexp"
"strings"
"github.com/spf13/afero"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/licenses"
)
// resolveModuleLicenses finds and parses license files for Go modules
func resolveModuleLicenses(ctx context.Context, pkgInfos []pkgInfo, fs afero.Fs) pkg.LicenseSet {
licenses := pkg.NewLicenseSet()
func resolveModuleLicenses(ctx context.Context, scanRoot string, pkgInfos []pkgInfo, fs afero.Fs) pkg.LicenseSet {
out := pkg.NewLicenseSet()
for _, info := range pkgInfos {
modDir, pkgDir, err := getAbsolutePkgPaths(info)
@ -23,22 +25,32 @@ func resolveModuleLicenses(ctx context.Context, pkgInfos []pkgInfo, fs afero.Fs)
continue
}
licenseFiles, err := findAllLicenseCandidatesUpwards(pkgDir, licenseRegexp, modDir, fs)
licenseFiles, err := findAllLicenseCandidatesUpwards(pkgDir, modDir, fs)
if err != nil {
continue
}
for _, f := range licenseFiles {
contents, err := fs.Open(f)
if err != nil {
continue
}
licenses.Add(pkg.NewLicensesFromReadCloserWithContext(ctx, file.NewLocationReadCloser(file.Location{}, contents))...)
_ = contents.Close()
out.Add(readLicenses(ctx, scanRoot, fs, f)...)
}
}
return licenses
return out
}
func readLicenses(ctx context.Context, scanRoot string, fs afero.Fs, f string) []pkg.License {
contents, err := fs.Open(f)
if err != nil {
log.WithFields("file", f, "error", err).Debug("unable to read license file")
return nil
}
defer internal.CloseAndLogError(contents, f)
location := file.Location{}
if scanRoot != "" && strings.HasPrefix(f, scanRoot) {
// include location when licenses are found within the scan target
location = file.NewLocation(strings.TrimPrefix(f, scanRoot))
}
return pkg.NewLicensesFromReadCloserWithContext(ctx, file.NewLocationReadCloser(location, contents))
}
/*
@ -60,7 +72,7 @@ When we should consider redesign tip to stem:
- We need to consider the case here where nested modules are visited by accident and licenses
are erroneously associated to a 'parent module'; bubble up currently prevents this
*/
func findAllLicenseCandidatesUpwards(dir string, r *regexp.Regexp, stopAt string, fs afero.Fs) ([]string, error) {
func findAllLicenseCandidatesUpwards(dir string, stopAt string, fs afero.Fs) ([]string, error) {
// Validate that both paths are absolute
if !filepath.IsAbs(dir) {
return nil, fmt.Errorf("dir must be an absolute path, got: %s", dir)
@ -69,25 +81,16 @@ func findAllLicenseCandidatesUpwards(dir string, r *regexp.Regexp, stopAt string
return nil, fmt.Errorf("stopAt must be an absolute path, got: %s", stopAt)
}
licenses, err := findLicenseCandidates(dir, r, stopAt, fs)
if err != nil {
return nil, err
return findLicenseCandidates(dir, stopAt, fs)
}
// Ensure we return an empty slice rather than nil for consistency
if licenses == nil {
return []string{}, nil
}
return licenses, nil
}
func findLicenseCandidates(dir string, r *regexp.Regexp, stopAt string, fs afero.Fs) ([]string, error) {
func findLicenseCandidates(dir string, stopAt string, fs afero.Fs) ([]string, error) {
// stop if we've gone outside the stopAt directory
if !strings.HasPrefix(dir, stopAt) {
return []string{}, nil
}
licenses, err := findLicensesInDir(dir, r, fs)
out, err := findLicensesInDir(dir, fs)
if err != nil {
return nil, err
}
@ -95,17 +98,17 @@ func findLicenseCandidates(dir string, r *regexp.Regexp, stopAt string, fs afero
parent := filepath.Dir(dir)
// can't go any higher up the directory tree: "/" case
if parent == dir {
return licenses, nil
return out, nil
}
// search parent directory and combine results
parentLicenses, err := findLicenseCandidates(parent, r, stopAt, fs)
parentLicenses, err := findLicenseCandidates(parent, stopAt, fs)
if err != nil {
return nil, err
}
// Combine current directory licenses with parent directory licenses
return append(licenses, parentLicenses...), nil
return append(out, parentLicenses...), nil
}
func getAbsolutePkgPaths(info pkgInfo) (modDir string, pkgDir string, err error) {
@ -126,8 +129,8 @@ func getAbsolutePkgPaths(info pkgInfo) (modDir string, pkgDir string, err error)
return modDir, pkgDir, nil
}
func findLicensesInDir(dir string, r *regexp.Regexp, fs afero.Fs) ([]string, error) {
var licenses []string
func findLicensesInDir(dir string, fs afero.Fs) ([]string, error) {
var out []string
dirContents, err := afero.ReadDir(fs, dir)
if err != nil {
@ -139,11 +142,11 @@ func findLicensesInDir(dir string, r *regexp.Regexp, fs afero.Fs) ([]string, err
continue
}
if r.MatchString(f.Name()) {
if licenses.IsLicenseFile(f.Name()) {
path := filepath.Join(dir, f.Name())
licenses = append(licenses, path)
out = append(out, path)
}
}
return licenses, nil
return out, nil
}

View File

@ -70,8 +70,8 @@ func TestFindAllLicenseCandidatesUpwards(t *testing.T) {
fs.MkdirAll("/empty/dir/tree", 0755)
// No license files
},
expectedFiles: []string{},
description: "Should return empty slice when no license files found",
expectedFiles: nil,
description: "Should return nil when no license files found",
},
{
name: "handles directory at filesystem root",
@ -205,7 +205,7 @@ func TestFindAllLicenseCandidatesUpwards(t *testing.T) {
tt.setupFS(fs)
// Run the function
result, err := findAllLicenseCandidatesUpwards(tt.startDir, licenseRegexp, tt.stopAt, fs)
result, err := findAllLicenseCandidatesUpwards(tt.startDir, tt.stopAt, fs)
// Check error expectation
if tt.expectedError {

View File

@ -19,14 +19,13 @@ import (
"github.com/go-git/go-git/v5"
"github.com/go-git/go-git/v5/plumbing"
"github.com/go-git/go-git/v5/storage/memory"
"github.com/scylladb/go-set/strset"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/cache"
"github.com/anchore/syft/internal/licenses"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/licenses"
)
type goLicenseResolver struct {
@ -35,7 +34,6 @@ type goLicenseResolver struct {
localModCacheDir fs.FS
localVendorDir fs.FS
licenseCache cache.Resolver[[]pkg.License]
lowerLicenseFileNames *strset.Set
}
func newGoLicenseResolver(catalogerName string, opts CatalogerConfig) goLicenseResolver {
@ -64,18 +62,9 @@ func newGoLicenseResolver(catalogerName string, opts CatalogerConfig) goLicenseR
localModCacheDir: localModCacheDir,
localVendorDir: localVendorDir,
licenseCache: cache.GetResolverCachingErrors[[]pkg.License]("golang", "v2"),
lowerLicenseFileNames: strset.New(lowercaseLicenseFiles()...),
}
}
func lowercaseLicenseFiles() []string {
fileNames := licenses.FileNames()
for i := range fileNames {
fileNames[i] = strings.ToLower(fileNames[i])
}
return fileNames
}
func remotesForModule(proxies []string, noProxy []string, module string) []string {
for _, pattern := range noProxy {
if matched, err := path.Match(pattern, module); err == nil && matched {
@ -194,7 +183,7 @@ func (c *goLicenseResolver) findLicensesInFS(ctx context.Context, urlPrefix stri
log.Debugf("nil entry for %s#%s", urlPrefix, filePath)
return nil
}
if !c.lowerLicenseFileNames.Has(strings.ToLower(d.Name())) {
if !licenses.IsLicenseFile(d.Name()) {
return nil
}
rdr, err := fsys.Open(filePath)
@ -203,11 +192,11 @@ func (c *goLicenseResolver) findLicensesInFS(ctx context.Context, urlPrefix stri
return nil
}
defer internal.CloseAndLogError(rdr, filePath)
licenses := pkg.NewLicensesFromReadCloserWithContext(ctx, file.NewLocationReadCloser(file.NewLocation(filePath), rdr))
foundLicenses := pkg.NewLicensesFromReadCloserWithContext(ctx, file.NewLocationReadCloser(file.NewLocation(filePath), rdr))
// since these licenses are found in an external fs.FS, not in the scanned source,
// get rid of the locations but keep information about the where the license was found
// by prepending the urlPrefix to the internal path for an accurate representation
for _, l := range licenses {
for _, l := range foundLicenses {
l.URLs = []string{urlPrefix + filePath}
l.Locations = file.NewLocationSet()
out = append(out, l)
@ -246,7 +235,7 @@ func (c *goLicenseResolver) findLicensesInSource(ctx context.Context, resolver f
func (c *goLicenseResolver) parseLicenseFromLocation(ctx context.Context, l file.Location, resolver file.Resolver) ([]pkg.License, error) {
var out []pkg.License
fileName := path.Base(l.RealPath)
if c.lowerLicenseFileNames.Has(strings.ToLower(fileName)) {
if licenses.IsLicenseFile(fileName) {
contents, err := resolver.FileContentsByLocation(l)
if err != nil {
return nil, err

View File

@ -7,7 +7,7 @@ import (
"go/build"
"io"
"path/filepath"
"regexp"
"slices"
"sort"
"strings"
@ -20,14 +20,11 @@ import (
"github.com/anchore/syft/internal/unknown"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/internal/fileresolver"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
)
var (
licenseRegexp = regexp.MustCompile(`^(?i)((UN)?LICEN(S|C)E|COPYING|NOTICE).*$`)
)
type goModCataloger struct {
licenseResolver goLicenseResolver
}
@ -46,9 +43,14 @@ func (c *goModCataloger) parseGoModFile(ctx context.Context, resolver file.Resol
log.Debugf("unable to get go.sum: %v", err)
}
scanRoot := ""
if dir, ok := resolver.(*fileresolver.Directory); ok && dir != nil {
scanRoot = dir.Chroot.Base()
}
// source analysis using go toolchain if available
syftSourcePackages, sourceModules, sourceDependencies, unknownErr := c.loadPackages(modDir, reader.Location)
catalogedModules, sourceModuleToPkg := c.catalogModules(ctx, syftSourcePackages, sourceModules, reader, digests)
catalogedModules, sourceModuleToPkg := c.catalogModules(ctx, scanRoot, syftSourcePackages, sourceModules, reader, digests)
relationships := buildModuleRelationships(catalogedModules, sourceDependencies, sourceModuleToPkg)
// base case go.mod file parsing
@ -208,12 +210,16 @@ func (c *goModCataloger) visitPackages(
}
}
}
pkgs[module.Path] = append(pkgs[module.Path], pkgInfo{
info := pkgInfo{
pkgPath: p.PkgPath,
modulePath: module.Path,
pkgDir: pkgDir,
moduleDir: module.Dir,
})
}
if !slices.Contains(pkgs[module.Path], info) { // avoid duplicates
pkgs[module.Path] = append(pkgs[module.Path], info)
}
modules[p.Module.Path] = module
return true
@ -224,6 +230,7 @@ func (c *goModCataloger) visitPackages(
// create syft packages from Go modules found by the go toolchain
func (c *goModCataloger) catalogModules(
ctx context.Context,
scanRoot string,
pkgs map[string][]pkgInfo,
modules map[string]*packages.Module,
reader file.LocationReadCloser,
@ -243,7 +250,7 @@ func (c *goModCataloger) catalogModules(
}
pkgInfos := pkgs[m.Path]
moduleLicenses := resolveModuleLicenses(ctx, pkgInfos, afero.NewOsFs())
moduleLicenses := resolveModuleLicenses(ctx, scanRoot, pkgInfos, afero.NewOsFs())
// we do out of source lookups for module parsing
// locations are NOT included in the SBOM because of this
goModulePkg := pkg.Package{

View File

@ -1,15 +1,22 @@
package homebrew
import (
"context"
"path"
"github.com/anchore/packageurl-go"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/licenses"
)
func newHomebrewPackage(pd parsedHomebrewData, formulaLocation file.Location) pkg.Package {
var licenses []string
func newHomebrewPackage(ctx context.Context, resolver file.Resolver, pd parsedHomebrewData, formulaLocation file.Location) pkg.Package {
var lics []pkg.License
if pd.License != "" {
licenses = append(licenses, pd.License)
lics = append(lics, pkg.NewLicensesFromValues(pd.License)...)
} else {
// sometimes licenses are included in the parent directory
lics = licenses.FindInDirs(ctx, resolver, path.Dir(formulaLocation.Path()))
}
p := pkg.Package{
@ -17,7 +24,7 @@ func newHomebrewPackage(pd parsedHomebrewData, formulaLocation file.Location) pk
Version: pd.Version,
Type: pkg.HomebrewPkg,
Locations: file.NewLocationSet(formulaLocation.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation)),
Licenses: pkg.NewLicenseSet(pkg.NewLicensesFromValues(licenses...)...),
Licenses: pkg.NewLicenseSet(lics...),
FoundBy: "homebrew-cataloger",
PURL: packageURL(pd.Name, pd.Version),
Metadata: pkg.HomebrewFormula{

View File

@ -22,7 +22,7 @@ type parsedHomebrewData struct {
License string
}
func parseHomebrewFormula(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
func parseHomebrewFormula(ctx context.Context, resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
pd, err := parseFormulaFile(reader)
if err != nil {
log.WithFields("path", reader.RealPath).Trace("failed to parse formula")
@ -35,6 +35,8 @@ func parseHomebrewFormula(_ context.Context, _ file.Resolver, _ *generic.Environ
return []pkg.Package{
newHomebrewPackage(
ctx,
resolver,
*pd,
reader.Location,
),

View File

@ -0,0 +1,123 @@
package licenses
import (
"context"
"path"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
)
// RelativeToPackage searches for licenses in the same directory as primary evidence locations
// on the package and returns the package with licenses set and ID reset if the package has no licenses already
func RelativeToPackage(ctx context.Context, resolver file.Resolver, p pkg.Package) pkg.Package {
// if licenses were already found, don't search for more
if !p.Licenses.Empty() {
return p
}
var out []pkg.License
for _, l := range p.Locations.ToUnorderedSlice() {
if evidenceType, ok := l.Annotations[pkg.EvidenceAnnotationKey]; ok && evidenceType != pkg.PrimaryEvidenceAnnotation {
continue
}
// search for license files relative to any primary evidence on the package
out = append(out, FindRelativeToLocations(ctx, resolver, l)...)
}
if len(out) > 0 {
p.Licenses = pkg.NewLicenseSet(out...)
p.SetID()
}
return p
}
// FindAtLocations creates License objects by reading license files directly the provided locations
func FindAtLocations(ctx context.Context, resolver file.Resolver, locations ...file.Location) []pkg.License {
var out []pkg.License
for _, loc := range locations {
out = append(out, readFromResolver(ctx, resolver, loc)...)
}
return out
}
// FindAtPaths creates License objects by reading license files directly at the provided paths
func FindAtPaths(ctx context.Context, resolver file.Resolver, paths ...string) []pkg.License {
var out []pkg.License
for _, p := range paths {
locs, err := resolver.FilesByPath(p)
if err != nil {
log.WithFields("error", err, "path", p).Trace("unable to resolve license path")
continue
}
for _, loc := range locs {
out = append(out, readFromResolver(ctx, resolver, loc)...)
}
}
return out
}
// FindInDirs creates License objects by searching for known license files in the provided directories
func FindInDirs(ctx context.Context, resolver file.Resolver, dirs ...string) []pkg.License {
var out []pkg.License
for _, dir := range dirs {
glob := path.Join(dir, "*") // only search in the directory
out = append(out, FindByGlob(ctx, resolver, glob)...)
}
return out
}
// FindRelativeToLocations creates License objects by searching for known license files relative to the provided locations, in the same directory path
func FindRelativeToLocations(ctx context.Context, resolver file.Resolver, locations ...file.Location) []pkg.License {
var out []pkg.License
for _, location := range locations {
dir := path.Dir(location.AccessPath)
out = append(out, FindInDirs(ctx, resolver, dir)...)
}
return out
}
// FindByGlob creates License objects by searching for license files with the provided glob.
// only file names which match licenses.LowerFileNames() case-insensitive will be included,
// so a recursive glob search such as: `<path>/**/*` will only attempt to read LICENSE files it finds, for example
func FindByGlob(ctx context.Context, resolver file.Resolver, glob string) []pkg.License {
locs, err := resolver.FilesByGlob(glob)
if err != nil {
log.WithFields("glob", glob, "error", err).Debug("error searching for license files")
return nil
}
var out []pkg.License
for _, l := range locs {
fileName := path.Base(l.Path())
if IsLicenseFile(fileName) {
out = append(out, readFromResolver(ctx, resolver, l)...)
}
}
return out
}
func NewFromValues(ctx context.Context, locations []file.Location, values ...string) []pkg.License {
if len(locations) == 0 {
return pkg.NewLicensesFromValuesWithContext(ctx, values...)
}
var out []pkg.License
for _, value := range values {
if value == "" {
continue
}
out = append(out, pkg.NewLicenseFromLocationsWithContext(ctx, value, locations...))
}
return out
}
func readFromResolver(ctx context.Context, resolver file.Resolver, location file.Location) []pkg.License {
metadataContents, err := resolver.FileContentsByLocation(location)
if err != nil || metadataContents == nil {
log.WithFields("error", err, "path", location.Path()).Trace("unable to license file contents")
return nil
}
defer internal.CloseAndLogError(metadataContents, location.Path())
return pkg.NewLicensesFromReadCloserWithContext(ctx, file.NewLocationReadCloser(location, metadataContents))
}

View File

@ -0,0 +1,171 @@
package licenses
import (
"context"
"testing"
"github.com/stretchr/testify/require"
"github.com/anchore/syft/internal/licenses"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/internal/fileresolver"
"github.com/anchore/syft/syft/pkg"
)
// scanner is used by all tests
var scanner = getScanner()
func Test_FindRelativeLicenses(t *testing.T) {
resolver := fileresolver.NewFromUnindexedDirectory("testdata")
sourceTxtResolved, err := resolver.FilesByPath("source.txt")
require.NoError(t, err)
sourceTxt := file.NewLocationSet(sourceTxtResolved[0].WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation))
tests := []struct {
name string
resolver file.Resolver
p pkg.Package
expected pkg.LicenseSet
}{
{
name: "existing license",
resolver: resolver,
p: pkg.Package{
Locations: sourceTxt,
Licenses: pkg.NewLicenseSet(pkg.NewLicense("GPL-2.0")),
},
expected: pkg.NewLicenseSet(pkg.NewLicense("GPL-2.0")),
},
{
name: "no licenses",
resolver: fileresolver.Empty{},
p: pkg.Package{
Locations: sourceTxt,
},
expected: pkg.NewLicenseSet(),
},
{
name: "found relative license",
resolver: resolver,
p: pkg.Package{
Locations: sourceTxt,
},
expected: pkg.NewLicenseSet(pkg.NewLicense("MIT")),
},
}
ctx := context.TODO()
ctx = licenses.SetContextLicenseScanner(ctx, scanner)
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := RelativeToPackage(ctx, tt.resolver, tt.p)
require.Equal(t, licenseNames(tt.expected.ToSlice()), licenseNames(got.Licenses.ToSlice()))
})
}
}
func Test_Finders(t *testing.T) {
resolver := fileresolver.NewFromUnindexedDirectory("testdata")
// prepare context with license scanner
ctx := context.TODO()
ctx = licenses.SetContextLicenseScanner(ctx, scanner)
// resolve known files
licenseLocs, err := resolver.FilesByPath("LICENSE")
require.NoError(t, err)
require.NotEmpty(t, licenseLocs)
licenseLoc := licenseLocs[0]
sourceLocs, err := resolver.FilesByPath("source.txt")
require.NoError(t, err)
require.NotEmpty(t, sourceLocs)
sourceLoc := sourceLocs[0]
tests := []struct {
name string
finder func(t *testing.T) []pkg.License
expected []string
}{
{
name: "FindAtLocations finds LICENSE content",
finder: func(t *testing.T) []pkg.License {
return FindAtLocations(ctx, resolver, licenseLoc)
},
expected: []string{"MIT"},
},
{
name: "FindAtLocations with empty resolver returns none",
finder: func(t *testing.T) []pkg.License {
return FindAtLocations(ctx, fileresolver.Empty{}, licenseLoc)
},
},
{
name: "FindAtPaths finds LICENSE by path",
finder: func(t *testing.T) []pkg.License {
return FindAtPaths(ctx, resolver, "LICENSE")
},
expected: []string{"MIT"},
},
{
name: "FindInDirs finds LICENSE in directory",
finder: func(t *testing.T) []pkg.License {
return FindInDirs(ctx, resolver, ".")
},
expected: []string{"MIT"},
},
{
name: "FindRelativeToLocations finds LICENSE relative to source.txt",
finder: func(t *testing.T) []pkg.License {
return FindRelativeToLocations(ctx, resolver, sourceLoc)
},
expected: []string{"MIT"},
},
{
name: "FindByGlob finds LICENSE with glob",
finder: func(t *testing.T) []pkg.License {
return FindByGlob(ctx, resolver, "*")
},
expected: []string{"MIT"},
},
{
name: "FindByGlob finds LICENSE with recursive glob",
finder: func(t *testing.T) []pkg.License {
return FindByGlob(ctx, resolver, "**/*")
},
expected: []string{"MIT"},
},
{
name: "NewFromValues with locations returns license values",
finder: func(t *testing.T) []pkg.License {
return NewFromValues(ctx, []file.Location{licenseLoc}, "MIT")
},
expected: []string{"MIT"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := tt.finder(t)
require.Equal(t, tt.expected, licenseNames(got))
})
}
}
func licenseNames(slice []pkg.License) []string {
var out []string
for _, l := range slice {
out = append(out, l.SPDXExpression)
}
return out
}
func getScanner() licenses.Scanner {
s, err := licenses.NewDefaultScanner()
if err != nil {
panic(err)
}
return s
}

View File

@ -0,0 +1,62 @@
package licenses
import (
"math"
"regexp"
"slices"
"strings"
"github.com/scylladb/go-set/strset"
"github.com/anchore/syft/internal/licenses"
)
var licenseRegexp = regexp.MustCompile(`^(?i)(?:(?:UN|MIT-)?LICEN[S|C]E|COPYING|NOTICE).*$`)
// lowerFileNames is a strset.Set of lowercased filenames
var lowerFileNames = func() *strset.Set {
lowerNames := strset.New()
for _, fileName := range licenses.FileNames() {
lowerNames.Add(strings.ToLower(fileName))
}
return lowerNames
}()
// lowerFileNamesSorted is a sorted slice of lowercased filenames
var lowerFileNamesSorted = func() []string {
out := lowerFileNames.List()
slices.Sort(out)
return out
}()
// remove duplicate names that match the regex, keep any extras to test after regex check
var minLength, extraFileNames = func() (int, []string) {
minSize := math.MaxInt
var extras []string
for _, name := range lowerFileNamesSorted {
if len(name) < minSize {
minSize = len(name)
}
if licenseRegexp.MatchString(name) {
continue
}
extras = append(extras, name)
}
return minSize, extras
}()
// IsLicenseFile returns true if the name matches known license file name patterns
func IsLicenseFile(name string) bool {
if len(name) < minLength {
return false
}
if licenseRegexp.MatchString(name) {
return true
}
for _, licenseFile := range extraFileNames {
if strings.EqualFold(licenseFile, name) {
return true
}
}
return false
}

View File

@ -0,0 +1,45 @@
package licenses
import (
"testing"
)
func Test_IsLicenseFile(t *testing.T) {
tests := []struct {
name string
input string
want bool
}{
// positive cases (should be detected as license files)
{"plain LICENSE", "LICENSE", true},
{"lowercase license", "license", true},
{"license with extension", "LICENSE.txt", true},
{"mixed case", "LiCeNsE", true},
{"copying", "COPYING", true},
{"AL2.0", "AL2.0", true},
{"notice", "NOTICE", true},
{"mit-license", "MIT-License", true},
{"unlicense", "UNLICENSE", true},
{"licence variant", "LICENCE", true},
{"license markdown", "license.md", true},
// negative cases (should NOT be detected)
{"AL1.0", "AL1.0", false},
{"readme", "README", false},
{"readme with ext", "README.md", false},
{"not a license", "not_a_license", false},
{"licensor (prefix-like but not)", "LICENSOR", false},
{"too short (below minLength)", "a", false},
}
for _, tt := range tests {
tt := tt
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
got := IsLicenseFile(tt.input)
if got != tt.want {
t.Fatalf("IsLicenseFile(%q) = %v, want %v", tt.input, got, tt.want)
}
})
}
}

View File

@ -0,0 +1,7 @@
Copyright 2025 Some Place, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View File

@ -0,0 +1 @@
a source file

View File

@ -26,6 +26,7 @@ import (
"github.com/anchore/syft/internal/relationship"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/internal/fileresolver"
"github.com/anchore/syft/syft/linux"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
@ -81,6 +82,7 @@ func NewCatalogTester() *CatalogTester {
locationComparer: cmptest.DefaultLocationComparer,
licenseComparer: cmptest.DefaultLicenseComparer,
packageStringer: stringPackage,
resolver: fileresolver.Empty{},
ignoreUnfulfilledPathResponses: map[string][]string{
"FilesByPath": {
// most catalogers search for a linux release, which will not be fulfilled in testing

View File

@ -17,13 +17,13 @@ import (
"github.com/anchore/syft/internal"
intFile "github.com/anchore/syft/internal/file"
"github.com/anchore/syft/internal/licenses"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/internal/unknown"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
"github.com/anchore/syft/syft/pkg/cataloger/internal/licenses"
"github.com/anchore/syft/syft/pkg/cataloger/java/internal/maven"
)
@ -63,7 +63,6 @@ type archiveParser struct {
detectNested bool
cfg ArchiveCatalogerConfig
maven *maven.Resolver
licenseScanner licenses.Scanner
}
type genericArchiveParserAdapter struct {
@ -81,7 +80,7 @@ func (gap genericArchiveParserAdapter) parseJavaArchive(ctx context.Context, _ f
// processJavaArchive processes an archive for java contents, returning all Java libraries and nested archives
func (gap genericArchiveParserAdapter) processJavaArchive(ctx context.Context, reader file.LocationReadCloser, parentPkg *pkg.Package) ([]pkg.Package, []artifact.Relationship, error) {
parser, cleanupFn, err := newJavaArchiveParser(ctx, reader, true, gap.cfg)
parser, cleanupFn, err := newJavaArchiveParser(reader, true, gap.cfg)
// note: even on error, we should always run cleanup functions
defer cleanupFn()
if err != nil {
@ -100,12 +99,7 @@ func uniquePkgKey(groupID string, p *pkg.Package) string {
// newJavaArchiveParser returns a new java archive parser object for the given archive. Can be configured to discover
// and parse nested archives or ignore them.
func newJavaArchiveParser(ctx context.Context, reader file.LocationReadCloser, detectNested bool, cfg ArchiveCatalogerConfig) (*archiveParser, func(), error) {
licenseScanner, err := licenses.ContextLicenseScanner(ctx)
if err != nil {
return nil, nil, fmt.Errorf("could not build license scanner for java archive parser: %w", err)
}
func newJavaArchiveParser(reader file.LocationReadCloser, detectNested bool, cfg ArchiveCatalogerConfig) (*archiveParser, func(), error) {
// fetch the last element of the virtual path
virtualElements := strings.Split(reader.Path(), ":")
currentFilepath := virtualElements[len(virtualElements)-1]
@ -129,7 +123,6 @@ func newJavaArchiveParser(ctx context.Context, reader file.LocationReadCloser, d
detectNested: detectNested,
cfg: cfg,
maven: maven.NewResolver(nil, cfg.mavenConfig()),
licenseScanner: licenseScanner,
}, cleanupFn, nil
}
@ -569,12 +562,16 @@ func getDigestsFromArchive(ctx context.Context, archivePath string) ([]file.Dige
}
func (j *archiveParser) getLicenseFromFileInArchive(ctx context.Context) ([]pkg.License, error) {
var out []pkg.License
for _, filename := range licenses.FileNames() {
licenseMatches := j.fileManifest.GlobMatch(true, "/META-INF/"+filename)
if len(licenseMatches) == 0 {
// Try the root directory if it's not in META-INF
licenseMatches = j.fileManifest.GlobMatch(true, "/"+filename)
// prefer identified licenses, fall back to unknown
var identified []pkg.License
var unidentified []pkg.License
for _, glob := range []string{"/META-INF/*", "/*"} {
var licenseMatches []string
for _, f := range j.fileManifest.GlobMatch(true, glob) {
if licenses.IsLicenseFile(path.Base(f)) {
licenseMatches = append(licenseMatches, f)
}
}
if len(licenseMatches) > 0 {
@ -586,15 +583,28 @@ func (j *archiveParser) getLicenseFromFileInArchive(ctx context.Context) ([]pkg.
for _, licenseMatch := range licenseMatches {
licenseContents := contents[licenseMatch]
r := strings.NewReader(licenseContents)
lics := pkg.NewLicensesFromReadCloserWithContext(ctx, file.NewLocationReadCloser(j.location, io.NopCloser(r)))
if len(lics) > 0 {
out = append(out, lics...)
}
foundLicenses := pkg.NewLicensesFromReadCloserWithContext(ctx, file.NewLocationReadCloser(j.location, io.NopCloser(r)))
for _, l := range foundLicenses {
if l.SPDXExpression != "" {
identified = append(identified, l)
} else {
unidentified = append(unidentified, l)
}
}
}
return out, nil
// prefer licenses found in /META-INF
if len(identified) > 0 {
break
}
}
}
if len(identified) == 0 {
return unidentified, nil
}
return identified, nil
}
func (j *archiveParser) discoverPkgsFromNestedArchives(ctx context.Context, parentPkg *pkg.Package) ([]pkg.Package, []artifact.Relationship, error) {
@ -799,7 +809,7 @@ func packageIdentitiesMatch(p pkg.Package, parentPkg *pkg.Package) bool {
switch {
case !ok:
log.WithFields("package", p.String()).Trace("unable to extract java metadata to check for matching package identity for package: %s", p.Name)
case !parentOk:
default: // !parentOk
log.WithFields("package", parentPkg.String()).Trace("unable to extract java metadata to check for matching package identity for package: %s", parentPkg.Name)
}
// if we can't extract metadata, we can check for matching identities via the package name

View File

@ -73,7 +73,6 @@ func TestSearchMavenForLicenses(t *testing.T) {
// setup parser
ap, cleanupFn, err := newJavaArchiveParser(
ctx,
file.LocationReadCloser{
Location: file.NewLocation(fixture.Name()),
ReadCloser: fixture,
@ -374,7 +373,6 @@ func TestParseJar(t *testing.T) {
UseMavenLocalRepository: false,
}
parser, cleanupFn, err := newJavaArchiveParser(
ctx,
file.LocationReadCloser{
Location: file.NewLocation(fixture.Name()),
ReadCloser: fixture,
@ -1478,7 +1476,6 @@ func Test_parseJavaArchive_regressions(t *testing.T) {
}
func Test_deterministicMatchingPomProperties(t *testing.T) {
ctx := pkgtest.Context()
tests := []struct {
fixture string
expected maven.ID
@ -1503,7 +1500,6 @@ func Test_deterministicMatchingPomProperties(t *testing.T) {
require.NoError(t, err)
parser, cleanupFn, err := newJavaArchiveParser(
ctx,
file.LocationReadCloser{
Location: file.NewLocation(fixture.Name()),
ReadCloser: fixture,
@ -1641,7 +1637,6 @@ func Test_jarPomPropertyResolutionDoesNotPanic(t *testing.T) {
ctx := context.TODO()
// setup parser
ap, cleanupFn, err := newJavaArchiveParser(
ctx,
file.LocationReadCloser{
Location: file.NewLocation(fixture.Name()),
ReadCloser: fixture,

View File

@ -11,6 +11,7 @@ import (
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/licenses"
"github.com/anchore/syft/syft/pkg/cataloger/java/internal/maven"
)
@ -65,7 +66,7 @@ func (p pomXMLCataloger) Catalog(ctx context.Context, fileResolver file.Resolver
continue
}
resolved[id] = mainPkg
pkgs = append(pkgs, *mainPkg)
pkgs = append(pkgs, licenses.RelativeToPackage(ctx, fileResolver, *mainPkg))
}
// catalog all dependencies
@ -117,7 +118,7 @@ func newPackageFromMavenPom(ctx context.Context, r *maven.Resolver, pom *maven.P
if err != nil {
log.Tracef("error resolving licenses: %v", err)
}
licenses := toPkgLicenses(ctx, &location, pomLicenses)
pkgLicenses := toPkgLicenses(ctx, &location, pomLicenses)
m := pkg.JavaArchive{
PomProject: &pkg.JavaPomProject{
@ -137,7 +138,7 @@ func newPackageFromMavenPom(ctx context.Context, r *maven.Resolver, pom *maven.P
Locations: file.NewLocationSet(
location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
),
Licenses: pkg.NewLicenseSet(licenses...),
Licenses: pkg.NewLicenseSet(pkgLicenses...),
Language: pkg.Java,
Type: pkg.JavaPkg,
FoundBy: pomCatalogerName,
@ -231,7 +232,7 @@ func newPackageFromDependency(ctx context.Context, r *maven.Resolver, pom *maven
id := r.ResolveDependencyID(ctx, pom, dep)
var err error
var licenses []pkg.License
var pkgLicenses []pkg.License
dependencyPom, depErr := r.FindPom(ctx, id.GroupID, id.ArtifactID, id.Version)
if depErr != nil {
err = errors.Join(err, depErr)
@ -240,7 +241,7 @@ func newPackageFromDependency(ctx context.Context, r *maven.Resolver, pom *maven
var pomProject *pkg.JavaPomProject
if dependencyPom != nil {
depLicenses, _ := r.ResolveLicenses(ctx, dependencyPom)
licenses = append(licenses, toPkgLicenses(ctx, nil, depLicenses)...)
pkgLicenses = append(pkgLicenses, toPkgLicenses(ctx, nil, depLicenses)...)
pomProject = &pkg.JavaPomProject{
Parent: pomParent(ctx, r, dependencyPom),
GroupID: id.GroupID,
@ -265,7 +266,7 @@ func newPackageFromDependency(ctx context.Context, r *maven.Resolver, pom *maven
Name: id.ArtifactID,
Version: id.Version,
Locations: file.NewLocationSet(locations...),
Licenses: pkg.NewLicenseSet(licenses...),
Licenses: pkg.NewLicenseSet(pkgLicenses...),
PURL: packageURL(id.ArtifactID, id.Version, m),
Language: pkg.Java,
Type: pkg.JavaPkg, // TODO: should we differentiate between packages from jar/war/zip versus packages from a pom.xml that were not installed yet?

View File

@ -16,9 +16,10 @@ import (
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/licenses"
)
func newPackageJSONPackage(ctx context.Context, u packageJSON, indexLocation file.Location) pkg.Package {
func newPackageJSONPackage(ctx context.Context, resolver file.Resolver, u packageJSON, indexLocation file.Location) pkg.Package {
licenseCandidates, err := u.licensesFromJSON()
if err != nil {
log.Debugf("unable to extract licenses from javascript package.json: %+v", err)
@ -79,6 +80,9 @@ func newPackageJSONPackage(ctx context.Context, u packageJSON, indexLocation fil
p.SetID()
// if license not specified, search for license files
p = licenses.RelativeToPackage(ctx, resolver, p)
return p
}
@ -103,8 +107,7 @@ func newPackageLockV1Package(ctx context.Context, cfg CatalogerConfig, resolver
if cfg.SearchRemoteLicenses {
license, err := getLicenseFromNpmRegistry(cfg.NPMBaseURL, name, version)
if err == nil && license != "" {
licenses := pkg.NewLicensesFromValuesWithContext(ctx, license)
licenseSet = pkg.NewLicenseSet(licenses...)
licenseSet = pkg.NewLicenseSet(pkg.NewLicensesFromValuesWithContext(ctx, license)...)
}
if err != nil {
log.Debugf("unable to extract licenses from javascript package-lock.json for package %s:%s: %+v", name, version, err)
@ -136,8 +139,7 @@ func newPackageLockV2Package(ctx context.Context, cfg CatalogerConfig, resolver
} else if cfg.SearchRemoteLicenses {
license, err := getLicenseFromNpmRegistry(cfg.NPMBaseURL, name, u.Version)
if err == nil && license != "" {
licenses := pkg.NewLicensesFromValuesWithContext(ctx, license)
licenseSet = pkg.NewLicenseSet(licenses...)
licenseSet = pkg.NewLicenseSet(pkg.NewLicensesFromValuesWithContext(ctx, license)...)
}
if err != nil {
log.Debugf("unable to extract licenses from javascript package-lock.json for package %s:%s: %+v", name, u.Version, err)
@ -167,8 +169,7 @@ func newPnpmPackage(ctx context.Context, cfg CatalogerConfig, resolver file.Reso
if cfg.SearchRemoteLicenses {
license, err := getLicenseFromNpmRegistry(cfg.NPMBaseURL, name, version)
if err == nil && license != "" {
licenses := pkg.NewLicensesFromValuesWithContext(ctx, license)
licenseSet = pkg.NewLicenseSet(licenses...)
licenseSet = pkg.NewLicenseSet(pkg.NewLicensesFromValuesWithContext(ctx, license)...)
}
if err != nil {
log.Debugf("unable to extract licenses from javascript pnpm-lock.yaml for package %s:%s: %+v", name, version, err)
@ -196,8 +197,7 @@ func newYarnLockPackage(ctx context.Context, cfg CatalogerConfig, resolver file.
if cfg.SearchRemoteLicenses {
license, err := getLicenseFromNpmRegistry(cfg.NPMBaseURL, name, version)
if err == nil && license != "" {
licenses := pkg.NewLicensesFromValuesWithContext(ctx, license)
licenseSet = pkg.NewLicenseSet(licenses...)
licenseSet = pkg.NewLicenseSet(pkg.NewLicensesFromValuesWithContext(ctx, license)...)
}
if err != nil {
log.Debugf("unable to extract licenses from javascript yarn.lock for package %s:%s: %+v", name, version, err)
@ -305,11 +305,11 @@ func addLicenses(name string, resolver file.Resolver, location file.Location) (a
}
for _, l := range locations {
licenses, err := parseLicensesFromLocation(l, resolver, pkgFile)
foundLicenses, err := parseLicensesFromLocation(l, resolver, pkgFile)
if err != nil {
return allLicenses
}
allLicenses = append(allLicenses, licenses...)
allLicenses = append(allLicenses, foundLicenses...)
}
return allLicenses
@ -336,12 +336,12 @@ func parseLicensesFromLocation(l file.Location, resolver file.Resolver, pkgFile
return nil, err
}
licenses, err := pkgJSON.licensesFromJSON()
out, err := pkgJSON.licensesFromJSON()
if err != nil {
log.Debugf("error getting licenses from %s: %v", pkgFile, err)
return nil, err
}
return licenses, nil
return out, nil
}
// packageURL returns the PURL for the specific NPM package (see https://github.com/package-url/purl-spec)

View File

@ -57,7 +57,7 @@ type repository struct {
var authorPattern = regexp.MustCompile(`^\s*(?P<name>[^<(]*)(\s+<(?P<email>.*)>)?(\s\((?P<url>.*)\))?\s*$`)
// parsePackageJSON parses a package.json and returns the discovered JavaScript packages.
func parsePackageJSON(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
func parsePackageJSON(ctx context.Context, resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
var pkgs []pkg.Package
dec := json.NewDecoder(reader)
@ -73,7 +73,7 @@ func parsePackageJSON(ctx context.Context, _ file.Resolver, _ *generic.Environme
// a compliance filter later will remove these packages based on compliance rules
pkgs = append(
pkgs,
newPackageJSONPackage(ctx, p, reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation)),
newPackageJSONPackage(ctx, resolver, p, reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation)),
)
}

View File

@ -2,14 +2,19 @@ package lua
import (
"context"
"path"
"github.com/anchore/packageurl-go"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/licenses"
)
func newLuaRocksPackage(ctx context.Context, u luaRocksPackage, indexLocation file.Location) pkg.Package {
func newLuaRocksPackage(ctx context.Context, resolver file.Resolver, u luaRocksPackage, indexLocation file.Location) pkg.Package {
license := pkg.NewLicensesFromLocationWithContext(ctx, indexLocation, u.License)
if len(license) == 0 {
license = licenses.FindInDirs(ctx, resolver, path.Dir(indexLocation.Path()))
}
p := pkg.Package{
Name: u.Name,
Version: u.Version,

View File

@ -27,7 +27,7 @@ type repository struct {
}
// parseRockspec parses a package.rockspec and returns the discovered Lua packages.
func parseRockspec(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
func parseRockspec(ctx context.Context, resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
doc, err := parseRockspecData(reader)
if err != nil {
log.WithFields("error", err).Trace("unable to parse Rockspec app")
@ -65,6 +65,7 @@ func parseRockspec(ctx context.Context, _ file.Resolver, _ *generic.Environment,
p := newLuaRocksPackage(
ctx,
resolver,
luaRocksPackage{
Name: name,
Version: version,

View File

@ -15,6 +15,7 @@ import (
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/licenses"
)
// storeCataloger finds package outputs installed in the Nix store location (/nix/store/*).
@ -58,11 +59,11 @@ func (c storeCataloger) Catalog(ctx context.Context, resolver file.Resolver) ([]
}
}
pkgs, rels := c.finalizeStorePackages(prototypes, drvs)
pkgs, rels := c.finalizeStorePackages(ctx, resolver, prototypes, drvs)
return pkgs, rels, err
}
func (c storeCataloger) finalizeStorePackages(pkgPrototypes []nixStorePackage, drvs *derivations) ([]pkg.Package, []artifact.Relationship) {
func (c storeCataloger) finalizeStorePackages(ctx context.Context, resolver file.Resolver, pkgPrototypes []nixStorePackage, drvs *derivations) ([]pkg.Package, []artifact.Relationship) {
var pkgs []pkg.Package
var pkgByStorePath = make(map[string]pkg.Package)
for _, pp := range pkgPrototypes {
@ -71,6 +72,7 @@ func (c storeCataloger) finalizeStorePackages(pkgPrototypes []nixStorePackage, d
}
p := newNixStorePackage(pp, c.name)
p = licenses.RelativeToPackage(ctx, resolver, p)
pkgs = append(pkgs, p)
pkgByStorePath[pp.Location.RealPath] = p
}

View File

@ -7,19 +7,18 @@ import (
"io"
"path"
"path/filepath"
"sort"
"strings"
"github.com/scylladb/go-set/strset"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/licenses"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/internal/unknown"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
"github.com/anchore/syft/syft/pkg/cataloger/internal/licenses"
)
// parseWheelOrEgg takes the primary metadata file reference and returns the python package it represents. Contained
@ -249,7 +248,7 @@ func assembleEggOrWheelMetadata(resolver file.Resolver, metadataLocation file.Lo
}
func findLicenses(ctx context.Context, resolver file.Resolver, m parsedData) pkg.LicenseSet {
var licenseSet pkg.LicenseSet
var out []pkg.License
licenseLocations := file.NewLocationSet()
if m.LicenseFilePath != "" {
@ -263,16 +262,12 @@ func findLicenses(ctx context.Context, resolver file.Resolver, m parsedData) pkg
switch {
case m.LicenseExpression != "" || m.Licenses != "":
licenseSet = getLicenseSetFromValues(ctx, licenseLocations.ToSlice(), m.LicenseExpression, m.Licenses)
out = licenses.NewFromValues(ctx, licenseLocations.ToSlice(), m.LicenseExpression, m.Licenses)
case !licenseLocations.Empty():
licenseSet = getLicenseSetFromFiles(ctx, resolver, licenseLocations.ToSlice()...)
out = licenses.FindAtLocations(ctx, resolver, licenseLocations.ToSlice()...)
default:
// search for known license paths from RECORDS file
licenseNames := strset.New()
for _, n := range licenses.FileNames() {
licenseNames.Add(strings.ToLower(n))
}
parent := path.Base(path.Dir(m.DistInfoLocation.Path()))
candidatePaths := strset.New()
for _, f := range m.Files {
@ -280,58 +275,12 @@ func findLicenses(ctx context.Context, resolver file.Resolver, m parsedData) pkg
continue
}
if licenseNames.Has(strings.ToLower(filepath.Base(f.Path))) {
if licenses.IsLicenseFile(filepath.Base(f.Path)) {
candidatePaths.Add(path.Join(m.SitePackagesRootPath, f.Path))
}
}
paths := candidatePaths.List()
sort.Strings(paths)
locationSet := file.NewLocationSet()
for _, p := range paths {
locs, err := resolver.FilesByPath(p)
if err != nil {
log.WithFields("error", err, "path", p).Trace("unable to resolve python license in dist-info")
continue
out = licenses.FindAtPaths(ctx, resolver, candidatePaths.List()...)
}
locationSet.Add(locs...)
}
licenseSet = getLicenseSetFromFiles(ctx, resolver, locationSet.ToSlice()...)
}
return licenseSet
}
func getLicenseSetFromValues(ctx context.Context, locations []file.Location, licenseValues ...string) pkg.LicenseSet {
if len(locations) == 0 {
return pkg.NewLicenseSet(pkg.NewLicensesFromValuesWithContext(ctx, licenseValues...)...)
}
licenseSet := pkg.NewLicenseSet()
for _, value := range licenseValues {
if value == "" {
continue
}
licenseSet.Add(pkg.NewLicenseFromLocationsWithContext(ctx, value, locations...))
}
return licenseSet
}
func getLicenseSetFromFiles(ctx context.Context, resolver file.Resolver, locations ...file.Location) pkg.LicenseSet {
licenseSet := pkg.NewLicenseSet()
for _, loc := range locations {
licenseSet.Add(getLicenseSetFromFile(ctx, resolver, loc)...)
}
return licenseSet
}
func getLicenseSetFromFile(ctx context.Context, resolver file.Resolver, location file.Location) []pkg.License {
metadataContents, err := resolver.FileContentsByLocation(location)
if err != nil {
log.WithFields("error", err, "path", location.Path()).Trace("unable to read file contents")
return nil
}
defer internal.CloseAndLogError(metadataContents, location.Path())
return pkg.NewLicensesFromReadCloserWithContext(ctx, file.NewLocationReadCloser(location, metadataContents))
return pkg.NewLicenseSet(out...)
}

View File

@ -11,6 +11,7 @@ import (
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
"github.com/anchore/syft/syft/pkg/cataloger/internal/licenses"
)
/* some examples of license strings found in DESCRIPTION files:
@ -29,13 +30,14 @@ License: Part of R 4.3.0
License: Unlimited
*/
func parseDescriptionFile(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
func parseDescriptionFile(ctx context.Context, resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
values := extractFieldsFromDescriptionFile(reader)
m := parseDataFromDescriptionMap(values)
p := newPackage(ctx, m, []file.Location{reader.Location}...)
if p.Name == "" || p.Version == "" {
return nil, nil, nil
}
p = licenses.RelativeToPackage(ctx, resolver, p)
return []pkg.Package{p}, nil, nil
}

View File

@ -10,6 +10,7 @@ import (
"github.com/stretchr/testify/require"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/internal/fileresolver"
"github.com/anchore/syft/syft/pkg"
)
@ -58,7 +59,7 @@ func Test_parseDescriptionFile(t *testing.T) {
Location: file.NewLocation(tt.fixture),
ReadCloser: f,
}
got, _, err := parseDescriptionFile(context.Background(), nil, nil, input)
got, _, err := parseDescriptionFile(context.Background(), fileresolver.Empty{}, nil, input)
assert.NoError(t, err)
for _, assertion := range tt.assertions {
assertion(t, got)

View File

@ -6,6 +6,7 @@ import (
"github.com/anchore/packageurl-go"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/licenses"
)
func newGemfileLockPackage(name, version string, locations ...file.Location) pkg.Package {
@ -23,7 +24,7 @@ func newGemfileLockPackage(name, version string, locations ...file.Location) pkg
return p
}
func newGemspecPackage(ctx context.Context, m gemData, gemSpecLocation file.Location) pkg.Package {
func newGemspecPackage(ctx context.Context, resolver file.Resolver, m gemData, gemSpecLocation file.Location) pkg.Package {
p := pkg.Package{
Name: m.Name,
Version: m.Version,
@ -37,6 +38,8 @@ func newGemspecPackage(ctx context.Context, m gemData, gemSpecLocation file.Loca
p.SetID()
p = licenses.RelativeToPackage(ctx, resolver, p)
return p
}

View File

@ -66,7 +66,7 @@ func processList(s string) []string {
}
// parseGemSpecEntries parses the gemspec file and returns the packages and relationships found.
func parseGemSpecEntries(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
func parseGemSpecEntries(ctx context.Context, resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
var pkgs []pkg.Package
var fields = make(map[string]interface{})
scanner := bufio.NewScanner(reader)
@ -106,6 +106,7 @@ func parseGemSpecEntries(ctx context.Context, _ file.Resolver, _ *generic.Enviro
pkgs,
newGemspecPackage(
ctx,
resolver,
metadata,
reader.Location,
),

View File

@ -1,17 +1,21 @@
package swipl
import (
"context"
"github.com/anchore/packageurl-go"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/licenses"
)
func newSwiplPackPackage(m pkg.SwiplPackEntry, locations ...file.Location) pkg.Package {
func newSwiplPackPackage(ctx context.Context, resolver file.Resolver, m pkg.SwiplPackEntry, locations ...file.Location) pkg.Package {
p := pkg.Package{
Name: m.Name,
Version: m.Version,
PURL: swiplpackPackageURL(m.Name, m.Version),
Locations: file.NewLocationSet(locations...),
Licenses: pkg.NewLicenseSet(licenses.FindRelativeToLocations(ctx, resolver, locations...)...),
Type: pkg.SwiplPackPkg,
Language: pkg.Swipl,
Metadata: m,

View File

@ -12,7 +12,7 @@ import (
"github.com/anchore/syft/syft/pkg/cataloger/generic"
)
func parsePackPackage(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
func parsePackPackage(ctx context.Context, resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
var pkgs []pkg.Package
nameRe := regexp.MustCompile(`name\(\s*'?([^')]+)'?\s*\)`)
@ -61,6 +61,8 @@ func parsePackPackage(_ context.Context, _ file.Resolver, _ *generic.Environment
pkgs = append(
pkgs,
newSwiplPackPackage(
ctx,
resolver,
entry,
reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
),

View File

@ -5,9 +5,10 @@ import (
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/licenses"
)
func newWordpressPluginPackage(ctx context.Context, name, version string, m pluginData, location file.Location) pkg.Package {
func newWordpressPluginPackage(ctx context.Context, resolver file.Resolver, name, version string, m pluginData, location file.Location) pkg.Package {
meta := pkg.WordpressPluginEntry{
PluginInstallDirectory: m.PluginInstallDirectory,
Author: m.Author,
@ -25,6 +26,8 @@ func newWordpressPluginPackage(ctx context.Context, name, version string, m plug
if len(m.Licenses) > 0 {
p.Licenses = pkg.NewLicenseSet(pkg.NewLicenseWithContext(ctx, m.Licenses[0]))
} else {
p = licenses.RelativeToPackage(ctx, resolver, p)
}
p.SetID()

View File

@ -38,7 +38,7 @@ type pluginData struct {
pkg.WordpressPluginEntry `mapstructure:",squash" json:",inline"`
}
func parseWordpressPluginFiles(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
func parseWordpressPluginFiles(ctx context.Context, resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
var pkgs []pkg.Package
buffer := make([]byte, contentBufferSize)
@ -81,6 +81,7 @@ func parseWordpressPluginFiles(ctx context.Context, _ file.Resolver, _ *generic.
pkgs,
newWordpressPluginPackage(
ctx,
resolver,
name.(string),
version.(string),
metadata,