feat: 3626 add option enable license content; disable by default (#3631)

---------
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
This commit is contained in:
Christopher Angelo Phillips 2025-02-05 15:41:03 -05:00 committed by GitHub
parent 7bab6e9851
commit e584c9f416
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
25 changed files with 436 additions and 98 deletions

View File

@ -33,6 +33,7 @@ type Catalog struct {
DefaultCatalogers []string `yaml:"default-catalogers" json:"default-catalogers" mapstructure:"default-catalogers"` DefaultCatalogers []string `yaml:"default-catalogers" json:"default-catalogers" mapstructure:"default-catalogers"`
SelectCatalogers []string `yaml:"select-catalogers" json:"select-catalogers" mapstructure:"select-catalogers"` SelectCatalogers []string `yaml:"select-catalogers" json:"select-catalogers" mapstructure:"select-catalogers"`
Package packageConfig `yaml:"package" json:"package" mapstructure:"package"` Package packageConfig `yaml:"package" json:"package" mapstructure:"package"`
License licenseConfig `yaml:"license" json:"license" mapstructure:"license"`
File fileConfig `yaml:"file" json:"file" mapstructure:"file"` File fileConfig `yaml:"file" json:"file" mapstructure:"file"`
Scope string `yaml:"scope" json:"scope" mapstructure:"scope"` Scope string `yaml:"scope" json:"scope" mapstructure:"scope"`
Parallelism int `yaml:"parallelism" json:"parallelism" mapstructure:"parallelism"` // the number of catalog workers to run in parallel Parallelism int `yaml:"parallelism" json:"parallelism" mapstructure:"parallelism"` // the number of catalog workers to run in parallel
@ -69,6 +70,7 @@ func DefaultCatalog() Catalog {
Compliance: defaultComplianceConfig(), Compliance: defaultComplianceConfig(),
Scope: source.SquashedScope.String(), Scope: source.SquashedScope.String(),
Package: defaultPackageConfig(), Package: defaultPackageConfig(),
License: defaultLicenseConfig(),
LinuxKernel: defaultLinuxKernelConfig(), LinuxKernel: defaultLinuxKernelConfig(),
Golang: defaultGolangConfig(), Golang: defaultGolangConfig(),
Java: defaultJavaConfig(), Java: defaultJavaConfig(),
@ -89,6 +91,7 @@ func (cfg Catalog) ToSBOMConfig(id clio.Identification) *syft.CreateSBOMConfig {
WithUnknownsConfig(cfg.ToUnknownsConfig()). WithUnknownsConfig(cfg.ToUnknownsConfig()).
WithSearchConfig(cfg.ToSearchConfig()). WithSearchConfig(cfg.ToSearchConfig()).
WithPackagesConfig(cfg.ToPackagesConfig()). WithPackagesConfig(cfg.ToPackagesConfig()).
WithLicenseConfig(cfg.ToLicenseConfig()).
WithFilesConfig(cfg.ToFilesConfig()). WithFilesConfig(cfg.ToFilesConfig()).
WithCatalogerSelection( WithCatalogerSelection(
cataloging.NewSelectionRequest(). cataloging.NewSelectionRequest().
@ -146,6 +149,13 @@ func (cfg Catalog) ToFilesConfig() filecataloging.Config {
} }
} }
func (cfg Catalog) ToLicenseConfig() cataloging.LicenseConfig {
return cataloging.LicenseConfig{
IncludeUnkownLicenseContent: cfg.License.IncludeUnknownLicenseContent,
Coverage: cfg.License.LicenseCoverage,
}
}
func (cfg Catalog) ToPackagesConfig() pkgcataloging.Config { func (cfg Catalog) ToPackagesConfig() pkgcataloging.Config {
archiveSearch := cataloging.ArchiveSearchConfig{ archiveSearch := cataloging.ArchiveSearchConfig{
IncludeIndexedArchives: cfg.Package.SearchIndexedArchives, IncludeIndexedArchives: cfg.Package.SearchIndexedArchives,

View File

@ -0,0 +1,28 @@
package options
import (
"github.com/anchore/clio"
)
type licenseConfig struct {
IncludeUnknownLicenseContent bool `yaml:"include-unknown-license-content" json:"include-unknown-license-content" mapstructure:"include-unknown-license-content"`
LicenseCoverage float64 `yaml:"license-coverage" json:"license-coverage" mapstructure:"license-coverage"`
}
var _ interface {
clio.FieldDescriber
} = (*licenseConfig)(nil)
func (o *licenseConfig) DescribeFields(descriptions clio.FieldDescriptionSet) {
descriptions.Add(&o.IncludeUnknownLicenseContent, `include the content of a license in the SBOM when syft
cannot determine a valid SPDX ID for the given license`)
descriptions.Add(&o.LicenseCoverage, `adjust the percent as a fraction of the total text, in normalized words, that
matches any valid license for the given inputs, expressed as a percentage across all of the licenses matched.`)
}
func defaultLicenseConfig() licenseConfig {
return licenseConfig{
IncludeUnknownLicenseContent: false,
LicenseCoverage: 75,
}
}

View File

@ -10,9 +10,9 @@ func SetContextLicenseScanner(ctx context.Context, s Scanner) context.Context {
return context.WithValue(ctx, licenseScannerKey{}, s) return context.WithValue(ctx, licenseScannerKey{}, s)
} }
func ContextLicenseScanner(ctx context.Context) Scanner { func ContextLicenseScanner(ctx context.Context) (Scanner, error) {
if s, ok := ctx.Value(licenseScannerKey{}).(Scanner); ok { if s, ok := ctx.Value(licenseScannerKey{}).(Scanner); ok {
return s return s, nil
} }
return NewDefaultScanner() return NewDefaultScanner()
} }

View File

@ -2,66 +2,84 @@ package licenses
import ( import (
"context" "context"
"fmt"
"io" "io"
"github.com/google/licensecheck" "github.com/google/licensecheck"
"github.com/anchore/syft/internal/log" "github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
) )
const coverageThreshold = 75 // determined by experimentation const (
DefaultCoverageThreshold = 75 // determined by experimentation
DefaultIncludeLicenseContent = false
)
type Scanner interface { type Scanner interface {
IdentifyLicenseIDs(context.Context, io.Reader) ([]string, []byte, error) IdentifyLicenseIDs(context.Context, io.Reader) ([]string, []byte, error)
FileSearch(context.Context, file.LocationReadCloser) ([]file.License, error)
PkgSearch(context.Context, file.LocationReadCloser) ([]pkg.License, error)
} }
var _ Scanner = (*scanner)(nil) var _ Scanner = (*scanner)(nil)
type scanner struct { type scanner struct {
coverageThreshold float64 // between 0 and 100 coverageThreshold float64 // between 0 and 100
scanner func([]byte) licensecheck.Coverage includeLicenseContent bool
scanner func([]byte) licensecheck.Coverage
}
type ScannerConfig struct {
CoverageThreshold float64
IncludeLicenseContent bool
Scanner func([]byte) licensecheck.Coverage
}
type Option func(*scanner)
func WithCoverage(coverage float64) Option {
return func(s *scanner) {
s.coverageThreshold = coverage
}
}
func WithIncludeLicenseContent(includeLicenseContent bool) Option {
return func(s *scanner) {
s.includeLicenseContent = includeLicenseContent
}
} }
// NewDefaultScanner returns a scanner that uses a new instance of the default licensecheck package scanner. // NewDefaultScanner returns a scanner that uses a new instance of the default licensecheck package scanner.
func NewDefaultScanner() Scanner { func NewDefaultScanner(o ...Option) (Scanner, error) {
s, err := licensecheck.NewScanner(licensecheck.BuiltinLicenses()) s, err := licensecheck.NewScanner(licensecheck.BuiltinLicenses())
if err != nil { if err != nil {
log.WithFields("error", err).Trace("unable to create default license scanner") log.WithFields("error", err).Trace("unable to create default license scanner")
s = nil return nil, fmt.Errorf("unable to create default license scanner: %w", err)
} }
newScanner := &scanner{
coverageThreshold: DefaultCoverageThreshold,
includeLicenseContent: DefaultIncludeLicenseContent,
scanner: s.Scan,
}
for _, opt := range o {
opt(newScanner)
}
return newScanner, nil
}
// NewScanner generates a license Scanner with the given ScannerConfig
// if config is nil NewDefaultScanner is used
func NewScanner(c *ScannerConfig) (Scanner, error) {
if c == nil {
return NewDefaultScanner()
}
return &scanner{ return &scanner{
coverageThreshold: coverageThreshold, coverageThreshold: c.CoverageThreshold,
scanner: s.Scan, includeLicenseContent: c.IncludeLicenseContent,
} scanner: c.Scanner,
} }, nil
func NewScanner(scan func([]byte) licensecheck.Coverage, coverage float64) Scanner {
return scanner{
coverageThreshold: coverage,
scanner: scan,
}
}
func (s scanner) IdentifyLicenseIDs(_ context.Context, reader io.Reader) ([]string, []byte, error) {
if s.scanner == nil {
return nil, nil, nil
}
content, err := io.ReadAll(reader)
if err != nil {
return nil, nil, err
}
cov := s.scanner(content)
if cov.Percent < s.coverageThreshold {
// unknown or no licenses here?
// => return binary content
return nil, content, nil
}
var ids []string
for _, m := range cov.Match {
ids = append(ids, m.ID)
}
return ids, nil, nil
} }

View File

@ -27,11 +27,11 @@ func TestIdentifyLicenseIDs(t *testing.T) {
expected: expectation{ expected: expectation{
yieldError: false, yieldError: false,
ids: []string{"Apache-2.0"}, ids: []string{"Apache-2.0"},
content: []byte{}, content: nil,
}, },
}, },
{ {
name: "custom license", name: "custom license includes content for IdentifyLicenseIDs",
in: "test-fixtures/nvidia-software-and-cuda-supplement", in: "test-fixtures/nvidia-software-and-cuda-supplement",
expected: expectation{ expected: expectation{
yieldError: false, yieldError: false,
@ -45,7 +45,7 @@ func TestIdentifyLicenseIDs(t *testing.T) {
t.Run(test.name, func(t *testing.T) { t.Run(test.name, func(t *testing.T) {
content, err := os.ReadFile(test.in) content, err := os.ReadFile(test.in)
require.NoError(t, err) require.NoError(t, err)
ids, content, err := testScanner().IdentifyLicenseIDs(context.TODO(), bytes.NewReader(content)) ids, content, err := testScanner(false).IdentifyLicenseIDs(context.TODO(), bytes.NewReader(content))
if test.expected.yieldError { if test.expected.yieldError {
require.Error(t, err) require.Error(t, err)
} else { } else {
@ -66,10 +66,11 @@ func TestIdentifyLicenseIDs(t *testing.T) {
} }
} }
func testScanner() Scanner { func testScanner(includeLicenseContent bool) Scanner {
return &scanner{ return &scanner{
coverageThreshold: coverageThreshold, coverageThreshold: DefaultCoverageThreshold,
scanner: licensecheck.Scan, includeLicenseContent: includeLicenseContent,
scanner: licensecheck.Scan,
} }
} }

View File

@ -4,6 +4,7 @@ import (
"context" "context"
"crypto/sha256" "crypto/sha256"
"fmt" "fmt"
"io"
"strings" "strings"
"github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/file"
@ -21,11 +22,35 @@ func getCustomLicenseContentHash(contents []byte) string {
return fmt.Sprintf("%x", hash[:]) return fmt.Sprintf("%x", hash[:])
} }
// Search scans the contents of a license file to attempt to determine the type of license it is func (s *scanner) IdentifyLicenseIDs(_ context.Context, reader io.Reader) ([]string, []byte, error) {
func Search(ctx context.Context, scanner Scanner, reader file.LocationReadCloser) (licenses []pkg.License, err error) { if s.scanner == nil {
return nil, nil, nil
}
content, err := io.ReadAll(reader)
if err != nil {
return nil, nil, err
}
cov := s.scanner(content)
if cov.Percent < s.coverageThreshold {
// unknown or no licenses here
// => check return content to Search to process
return nil, content, nil
}
var ids []string
for _, m := range cov.Match {
ids = append(ids, m.ID)
}
return ids, nil, nil
}
// PkgSearch scans the contents of a license file to attempt to determine the type of license it is
func (s *scanner) PkgSearch(ctx context.Context, reader file.LocationReadCloser) (licenses []pkg.License, err error) {
licenses = make([]pkg.License, 0) licenses = make([]pkg.License, 0)
ids, content, err := scanner.IdentifyLicenseIDs(ctx, reader) ids, content, err := s.IdentifyLicenseIDs(ctx, reader)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -48,7 +73,47 @@ func Search(ctx context.Context, scanner Scanner, reader file.LocationReadCloser
lic := pkg.NewLicenseFromLocations(unknownLicenseType, reader.Location) lic := pkg.NewLicenseFromLocations(unknownLicenseType, reader.Location)
lic.SPDXExpression = UnknownLicensePrefix + getCustomLicenseContentHash(content) lic.SPDXExpression = UnknownLicensePrefix + getCustomLicenseContentHash(content)
lic.Contents = string(content) if s.includeLicenseContent {
lic.Contents = string(content)
}
lic.Type = license.Declared
licenses = append(licenses, lic)
}
return licenses, nil
}
// FileSearch scans the contents of a license file to attempt to determine the type of license it is
func (s *scanner) FileSearch(ctx context.Context, reader file.LocationReadCloser) (licenses []file.License, err error) {
licenses = make([]file.License, 0)
ids, content, err := s.IdentifyLicenseIDs(ctx, reader)
if err != nil {
return nil, err
}
// IdentifyLicenseIDs can only return a list of ID or content
// These return values are mutually exclusive.
// If the scanner threshold for matching scores < 75% then we return the license full content
if len(ids) > 0 {
for _, id := range ids {
lic := file.NewLicense(id)
lic.Type = license.Concluded
licenses = append(licenses, lic)
}
} else if len(content) > 0 {
// harmonize line endings to unix compatible first:
// 1. \r\n => \n (Windows => UNIX)
// 2. \r => \n (Macintosh => UNIX)
content = []byte(strings.ReplaceAll(strings.ReplaceAll(string(content), "\r\n", "\n"), "\r", "\n"))
lic := file.NewLicense(unknownLicenseType)
lic.SPDXExpression = UnknownLicensePrefix + getCustomLicenseContentHash(content)
if s.includeLicenseContent {
lic.Contents = string(content)
}
lic.Type = license.Declared lic.Type = license.Declared
licenses = append(licenses, lic) licenses = append(licenses, lic)

View File

@ -27,47 +27,28 @@ func newBytesReadCloser(data []byte) *bytesReadCloser {
} }
} }
func TestSearch(t *testing.T) { func TestSearchFileLicenses(t *testing.T) {
type expectation struct { type expectation struct {
yieldError bool yieldError bool
licenses []pkg.License licenses []file.License
} }
testLocation := file.NewLocation("LICENSE")
tests := []struct { tests := []struct {
name string name string
in string in string
expected expectation includeUnkownLicenseContent bool
expected expectation
}{ }{
{ {
name: "apache license 2.0", name: "apache license 2.0",
in: "test-fixtures/apache-license-2.0", in: "test-fixtures/apache-license-2.0",
expected: expectation{ expected: expectation{
yieldError: false, yieldError: false,
licenses: []pkg.License{ licenses: []file.License{
{ {
Value: "Apache-2.0", Value: "Apache-2.0",
SPDXExpression: "Apache-2.0", SPDXExpression: "Apache-2.0",
Type: "concluded", Type: "concluded",
URLs: nil,
Locations: file.NewLocationSet(testLocation),
Contents: "",
},
},
},
},
{
name: "custom license",
in: "test-fixtures/nvidia-software-and-cuda-supplement",
expected: expectation{
yieldError: false,
licenses: []pkg.License{
{
Value: "UNKNOWN",
SPDXExpression: "UNKNOWN_eebcea3ab1d1a28e671de90119ffcfb35fe86951e4af1b17af52b7a82fcf7d0a",
Type: "declared",
URLs: nil,
Locations: file.NewLocationSet(testLocation),
Contents: string(mustOpen("test-fixtures/nvidia-software-and-cuda-supplement")),
}, },
}, },
}, },
@ -76,9 +57,11 @@ func TestSearch(t *testing.T) {
for _, test := range tests { for _, test := range tests {
t.Run(test.name, func(t *testing.T) { t.Run(test.name, func(t *testing.T) {
ctx := context.TODO()
content, err := os.ReadFile(test.in) content, err := os.ReadFile(test.in)
require.NoError(t, err) require.NoError(t, err)
result, err := Search(context.TODO(), testScanner(), file.NewLocationReadCloser(file.NewLocation("LICENSE"), io.NopCloser(bytes.NewReader(content)))) s := testScanner(false)
result, err := s.FileSearch(ctx, file.NewLocationReadCloser(file.NewLocation("LICENSE"), io.NopCloser(bytes.NewReader(content))))
if test.expected.yieldError { if test.expected.yieldError {
require.Error(t, err) require.Error(t, err)
} else { } else {
@ -93,3 +76,91 @@ func TestSearch(t *testing.T) {
}) })
} }
} }
func TestSearchPkgLicenses(t *testing.T) {
type expectation struct {
wantErr require.ErrorAssertionFunc
licenses []pkg.License
}
testLocation := file.NewLocation("LICENSE")
tests := []struct {
name string
in string
includeUnkownLicenseContent bool
expected expectation
}{
{
name: "apache license 2.0",
in: "test-fixtures/apache-license-2.0",
expected: expectation{
licenses: []pkg.License{
{
Value: "Apache-2.0",
SPDXExpression: "Apache-2.0",
Type: "concluded",
URLs: nil,
Locations: file.NewLocationSet(testLocation),
Contents: "",
},
},
wantErr: nil,
},
},
{
name: "custom license no content by default",
in: "test-fixtures/nvidia-software-and-cuda-supplement",
expected: expectation{
licenses: []pkg.License{
{
Value: "UNKNOWN",
SPDXExpression: "UNKNOWN_eebcea3ab1d1a28e671de90119ffcfb35fe86951e4af1b17af52b7a82fcf7d0a",
Type: "declared",
URLs: nil,
Locations: file.NewLocationSet(testLocation),
Contents: "",
},
},
wantErr: nil,
},
},
{
name: "custom license with content when scanner has content config",
in: "test-fixtures/nvidia-software-and-cuda-supplement",
includeUnkownLicenseContent: true,
expected: expectation{
licenses: []pkg.License{
{
Value: "UNKNOWN",
SPDXExpression: "UNKNOWN_eebcea3ab1d1a28e671de90119ffcfb35fe86951e4af1b17af52b7a82fcf7d0a",
Type: "declared",
URLs: nil,
Locations: file.NewLocationSet(testLocation),
Contents: string(mustOpen("test-fixtures/nvidia-software-and-cuda-supplement")),
},
},
wantErr: nil,
},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
ctx := context.TODO()
content, err := os.ReadFile(test.in)
require.NoError(t, err)
s := testScanner(test.includeUnkownLicenseContent)
result, err := s.PkgSearch(ctx, file.NewLocationReadCloser(file.NewLocation("LICENSE"), io.NopCloser(bytes.NewReader(content))))
if test.expected.wantErr != nil {
test.expected.wantErr(t, err)
}
require.NoError(t, err)
require.Len(t, result, len(test.expected.licenses))
if len(test.expected.licenses) > 0 {
require.Equal(t, test.expected.licenses, result)
}
})
}
}

View File

@ -11,6 +11,7 @@ type CatalogingFactoryConfig struct {
SearchConfig cataloging.SearchConfig SearchConfig cataloging.SearchConfig
RelationshipsConfig cataloging.RelationshipsConfig RelationshipsConfig cataloging.RelationshipsConfig
DataGenerationConfig cataloging.DataGenerationConfig DataGenerationConfig cataloging.DataGenerationConfig
LicenseConfig cataloging.LicenseConfig
PackagesConfig pkgcataloging.Config PackagesConfig pkgcataloging.Config
FilesConfig filecataloging.Config FilesConfig filecataloging.Config
} }
@ -21,6 +22,7 @@ func DefaultCatalogingFactoryConfig() CatalogingFactoryConfig {
SearchConfig: cataloging.DefaultSearchConfig(), SearchConfig: cataloging.DefaultSearchConfig(),
RelationshipsConfig: cataloging.DefaultRelationshipsConfig(), RelationshipsConfig: cataloging.DefaultRelationshipsConfig(),
DataGenerationConfig: cataloging.DefaultDataGenerationConfig(), DataGenerationConfig: cataloging.DefaultDataGenerationConfig(),
LicenseConfig: cataloging.DefaultLicenseConfig(),
PackagesConfig: pkgcataloging.DefaultConfig(), PackagesConfig: pkgcataloging.DefaultConfig(),
FilesConfig: filecataloging.DefaultConfig(), FilesConfig: filecataloging.DefaultConfig(),
} }

View File

@ -0,0 +1,15 @@
package cataloging
import "github.com/anchore/syft/internal/licenses"
type LicenseConfig struct {
IncludeUnkownLicenseContent bool `json:"include-unknown-license-content" yaml:"include-unknown-license-content" mapstructure:"include-unknown-license-content"`
Coverage float64 `json:"coverage" yaml:"coverage" mapstructure:"coverage"`
}
func DefaultLicenseConfig() LicenseConfig {
return LicenseConfig{
IncludeUnkownLicenseContent: licenses.DefaultIncludeLicenseContent,
Coverage: licenses.DefaultCoverageThreshold,
}
}

View File

@ -16,6 +16,7 @@ type configurationAuditTrail struct {
DataGeneration cataloging.DataGenerationConfig `json:"data-generation" yaml:"data-generation" mapstructure:"data-generation"` DataGeneration cataloging.DataGenerationConfig `json:"data-generation" yaml:"data-generation" mapstructure:"data-generation"`
Packages pkgcataloging.Config `json:"packages" yaml:"packages" mapstructure:"packages"` Packages pkgcataloging.Config `json:"packages" yaml:"packages" mapstructure:"packages"`
Files filecataloging.Config `json:"files" yaml:"files" mapstructure:"files"` Files filecataloging.Config `json:"files" yaml:"files" mapstructure:"files"`
Licenses cataloging.LicenseConfig `json:"licenses" yaml:"licenses" mapstructure:"licenses"`
Catalogers catalogerManifest `json:"catalogers" yaml:"catalogers" mapstructure:"catalogers"` Catalogers catalogerManifest `json:"catalogers" yaml:"catalogers" mapstructure:"catalogers"`
ExtraConfigs any `json:"extra,omitempty" yaml:"extra" mapstructure:"extra"` ExtraConfigs any `json:"extra,omitempty" yaml:"extra" mapstructure:"extra"`
} }

View File

@ -52,6 +52,7 @@ func CreateSBOM(ctx context.Context, src source.Source, cfg *CreateSBOMConfig) (
DataGeneration: cfg.DataGeneration, DataGeneration: cfg.DataGeneration,
Packages: cfg.Packages, Packages: cfg.Packages,
Files: cfg.Files, Files: cfg.Files,
Licenses: cfg.Licenses,
Catalogers: *audit, Catalogers: *audit,
ExtraConfigs: cfg.ToolConfiguration, ExtraConfigs: cfg.ToolConfiguration,
}, },
@ -61,8 +62,15 @@ func CreateSBOM(ctx context.Context, src source.Source, cfg *CreateSBOMConfig) (
}, },
} }
// inject a single license scanner for all package cataloging tasks into context // inject a single license scanner and content config for all package cataloging tasks into context
ctx = licenses.SetContextLicenseScanner(ctx, licenses.NewDefaultScanner()) licenseScanner, err := licenses.NewDefaultScanner(
licenses.WithIncludeLicenseContent(cfg.Licenses.IncludeUnkownLicenseContent),
licenses.WithCoverage(cfg.Licenses.Coverage),
)
if err != nil {
return nil, fmt.Errorf("could not build licenseScanner for cataloging: %w", err)
}
ctx = licenses.SetContextLicenseScanner(ctx, licenseScanner)
catalogingProgress := monitorCatalogingTask(src.ID(), taskGroups) catalogingProgress := monitorCatalogingTask(src.ID(), taskGroups)
packageCatalogingProgress := monitorPackageCatalogingTask() packageCatalogingProgress := monitorPackageCatalogingTask()

View File

@ -26,6 +26,7 @@ type CreateSBOMConfig struct {
Unknowns cataloging.UnknownsConfig Unknowns cataloging.UnknownsConfig
DataGeneration cataloging.DataGenerationConfig DataGeneration cataloging.DataGenerationConfig
Packages pkgcataloging.Config Packages pkgcataloging.Config
Licenses cataloging.LicenseConfig
Files filecataloging.Config Files filecataloging.Config
Parallelism int Parallelism int
CatalogerSelection cataloging.SelectionRequest CatalogerSelection cataloging.SelectionRequest
@ -46,6 +47,7 @@ func DefaultCreateSBOMConfig() *CreateSBOMConfig {
Relationships: cataloging.DefaultRelationshipsConfig(), Relationships: cataloging.DefaultRelationshipsConfig(),
DataGeneration: cataloging.DefaultDataGenerationConfig(), DataGeneration: cataloging.DefaultDataGenerationConfig(),
Packages: pkgcataloging.DefaultConfig(), Packages: pkgcataloging.DefaultConfig(),
Licenses: cataloging.DefaultLicenseConfig(),
Files: filecataloging.DefaultConfig(), Files: filecataloging.DefaultConfig(),
Parallelism: 1, Parallelism: 1,
packageTaskFactories: task.DefaultPackageTaskFactories(), packageTaskFactories: task.DefaultPackageTaskFactories(),
@ -134,6 +136,12 @@ func (c *CreateSBOMConfig) WithPackagesConfig(cfg pkgcataloging.Config) *CreateS
return c return c
} }
// WithPackagesConfig allows for defining any specific behavior for syft-implemented catalogers.
func (c *CreateSBOMConfig) WithLicenseConfig(cfg cataloging.LicenseConfig) *CreateSBOMConfig {
c.Licenses = cfg
return c
}
// WithFilesConfig allows for defining file-based cataloging parameters. // WithFilesConfig allows for defining file-based cataloging parameters.
func (c *CreateSBOMConfig) WithFilesConfig(cfg filecataloging.Config) *CreateSBOMConfig { func (c *CreateSBOMConfig) WithFilesConfig(cfg filecataloging.Config) *CreateSBOMConfig {
c.Files = cfg c.Files = cfg

View File

@ -10,6 +10,7 @@ type License struct {
SPDXExpression string SPDXExpression string
Type license.Type Type license.Type
LicenseEvidence *LicenseEvidence // evidence from license classifier LicenseEvidence *LicenseEvidence // evidence from license classifier
Contents string `hash:"ignore"`
} }
type LicenseEvidence struct { type LicenseEvidence struct {

View File

@ -214,7 +214,7 @@ func (c *goLicenseResolver) findLicensesInFS(ctx context.Context, scanner licens
} }
defer internal.CloseAndLogError(rdr, filePath) defer internal.CloseAndLogError(rdr, filePath)
parsed, err := licenses.Search(ctx, scanner, file.NewLocationReadCloser(file.NewLocation(filePath), rdr)) parsed, err := scanner.PkgSearch(ctx, file.NewLocationReadCloser(file.NewLocation(filePath), rdr))
if err != nil { if err != nil {
log.Debugf("error parsing license file %s: %v", filePath, err) log.Debugf("error parsing license file %s: %v", filePath, err)
return nil return nil
@ -267,7 +267,7 @@ func (c *goLicenseResolver) parseLicenseFromLocation(ctx context.Context, scanne
return nil, err return nil, err
} }
defer internal.CloseAndLogError(contents, l.RealPath) defer internal.CloseAndLogError(contents, l.RealPath)
parsed, err := licenses.Search(ctx, scanner, file.NewLocationReadCloser(l, contents)) parsed, err := scanner.PkgSearch(ctx, file.NewLocationReadCloser(l, contents))
if err != nil { if err != nil {
return nil, err return nil, err
} }

View File

@ -71,7 +71,12 @@ func Test_LicenseSearch(t *testing.T) {
localVendorDir := filepath.Join(wd, "test-fixtures", "licenses-vendor") localVendorDir := filepath.Join(wd, "test-fixtures", "licenses-vendor")
licenseScanner := licenses.NewScanner(licensecheck.Scan, float64(75)) sc := &licenses.ScannerConfig{
CoverageThreshold: 75,
Scanner: licensecheck.Scan,
}
licenseScanner, err := licenses.NewScanner(sc)
require.NoError(t, err)
tests := []struct { tests := []struct {
name string name string
@ -296,7 +301,10 @@ func Test_findVersionPath(t *testing.T) {
func Test_walkDirErrors(t *testing.T) { func Test_walkDirErrors(t *testing.T) {
resolver := newGoLicenseResolver("", CatalogerConfig{}) resolver := newGoLicenseResolver("", CatalogerConfig{})
_, err := resolver.findLicensesInFS(context.Background(), licenses.NewScanner(licensecheck.Scan, float64(75)), "somewhere", badFS{}) sc := &licenses.ScannerConfig{Scanner: licensecheck.Scan, CoverageThreshold: 75}
scanner, err := licenses.NewScanner(sc)
require.NoError(t, err)
_, err = resolver.findLicensesInFS(context.Background(), scanner, "somewhere", badFS{})
require.Error(t, err) require.Error(t, err)
} }
@ -314,7 +322,9 @@ func Test_noLocalGoModDir(t *testing.T) {
validTmp := t.TempDir() validTmp := t.TempDir()
require.NoError(t, os.MkdirAll(filepath.Join(validTmp, "mod@ver"), 0700|os.ModeDir)) require.NoError(t, os.MkdirAll(filepath.Join(validTmp, "mod@ver"), 0700|os.ModeDir))
licenseScanner := licenses.NewScanner(licensecheck.Scan, float64(75)) sc := &licenses.ScannerConfig{Scanner: licensecheck.Scan, CoverageThreshold: 75}
licenseScanner, err := licenses.NewScanner(sc)
require.NoError(t, err)
tests := []struct { tests := []struct {
name string name string
dir string dir string

View File

@ -63,7 +63,10 @@ func newGoBinaryCataloger(opts CatalogerConfig) *goBinaryCataloger {
func (c *goBinaryCataloger) parseGoBinary(ctx context.Context, resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { func (c *goBinaryCataloger) parseGoBinary(ctx context.Context, resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
var pkgs []pkg.Package var pkgs []pkg.Package
licenseScanner := licenses.ContextLicenseScanner(ctx) licenseScanner, err := licenses.ContextLicenseScanner(ctx)
if err != nil {
return nil, nil, err
}
unionReader, err := unionreader.GetUnionReader(reader.ReadCloser) unionReader, err := unionreader.GetUnionReader(reader.ReadCloser)
if err != nil { if err != nil {

View File

@ -170,7 +170,9 @@ func TestBuildGoPkgInfo(t *testing.T) {
}, },
} }
licenseScanner := licenses.NewScanner(licensecheck.Scan, float64(75)) sc := &licenses.ScannerConfig{Scanner: licensecheck.Scan, CoverageThreshold: 75}
licenseScanner, err := licenses.NewScanner(sc)
require.NoError(t, err)
tests := []struct { tests := []struct {
name string name string

View File

@ -35,7 +35,10 @@ func newGoModCataloger(opts CatalogerConfig) *goModCataloger {
func (c *goModCataloger) parseGoModFile(ctx context.Context, resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { func (c *goModCataloger) parseGoModFile(ctx context.Context, resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
packages := make(map[string]pkg.Package) packages := make(map[string]pkg.Package)
licenseScanner := licenses.ContextLicenseScanner(ctx) licenseScanner, err := licenses.ContextLicenseScanner(ctx)
if err != nil {
return nil, nil, fmt.Errorf("unable to create default license scanner: %w", err)
}
contents, err := io.ReadAll(reader) contents, err := io.ReadAll(reader)
if err != nil { if err != nil {

View File

@ -96,7 +96,10 @@ func uniquePkgKey(groupID string, p *pkg.Package) string {
// newJavaArchiveParser returns a new java archive parser object for the given archive. Can be configured to discover // newJavaArchiveParser returns a new java archive parser object for the given archive. Can be configured to discover
// and parse nested archives or ignore them. // and parse nested archives or ignore them.
func newJavaArchiveParser(ctx context.Context, reader file.LocationReadCloser, detectNested bool, cfg ArchiveCatalogerConfig) (*archiveParser, func(), error) { func newJavaArchiveParser(ctx context.Context, reader file.LocationReadCloser, detectNested bool, cfg ArchiveCatalogerConfig) (*archiveParser, func(), error) {
licenseScanner := licenses.ContextLicenseScanner(ctx) licenseScanner, err := licenses.ContextLicenseScanner(ctx)
if err != nil {
return nil, nil, fmt.Errorf("could not build license scanner for java archive parser: %w", err)
}
// fetch the last element of the virtual path // fetch the last element of the virtual path
virtualElements := strings.Split(reader.Path(), ":") virtualElements := strings.Split(reader.Path(), ":")
@ -506,7 +509,7 @@ func (j *archiveParser) getLicenseFromFileInArchive(ctx context.Context) ([]pkg.
for _, licenseMatch := range licenseMatches { for _, licenseMatch := range licenseMatches {
licenseContents := contents[licenseMatch] licenseContents := contents[licenseMatch]
r := strings.NewReader(licenseContents) r := strings.NewReader(licenseContents)
parsed, err := licenses.Search(ctx, j.licenseScanner, file.NewLocationReadCloser(j.location, io.NopCloser(r))) parsed, err := j.licenseScanner.PkgSearch(ctx, file.NewLocationReadCloser(j.location, io.NopCloser(r)))
if err != nil { if err != nil {
return nil, err return nil, err
} }

View File

@ -32,8 +32,10 @@ import (
func TestSearchMavenForLicenses(t *testing.T) { func TestSearchMavenForLicenses(t *testing.T) {
url := maventest.MockRepo(t, "internal/maven/test-fixtures/maven-repo") url := maventest.MockRepo(t, "internal/maven/test-fixtures/maven-repo")
sc := &licenses.ScannerConfig{Scanner: licensecheck.Scan, CoverageThreshold: 75}
ctx := licenses.SetContextLicenseScanner(context.Background(), licenses.NewScanner(licensecheck.Scan, float64(75))) scanner, err := licenses.NewScanner(sc)
require.NoError(t, err)
ctx := licenses.SetContextLicenseScanner(context.Background(), scanner)
tests := []struct { tests := []struct {
name string name string
@ -92,7 +94,10 @@ func TestSearchMavenForLicenses(t *testing.T) {
} }
func TestParseJar(t *testing.T) { func TestParseJar(t *testing.T) {
ctx := licenses.SetContextLicenseScanner(context.Background(), licenses.NewScanner(licensecheck.Scan, float64(75))) sc := &licenses.ScannerConfig{Scanner: licensecheck.Scan, CoverageThreshold: 75}
scanner, err := licenses.NewScanner(sc)
require.NoError(t, err)
ctx := licenses.SetContextLicenseScanner(context.Background(), scanner)
tests := []struct { tests := []struct {
name string name string
@ -1375,7 +1380,10 @@ func Test_parseJavaArchive_regressions(t *testing.T) {
} }
func Test_deterministicMatchingPomProperties(t *testing.T) { func Test_deterministicMatchingPomProperties(t *testing.T) {
ctx := licenses.SetContextLicenseScanner(context.Background(), licenses.NewScanner(licensecheck.Scan, float64(75))) sc := &licenses.ScannerConfig{Scanner: licensecheck.Scan, CoverageThreshold: 75}
scanner, err := licenses.NewScanner(sc)
require.NoError(t, err)
ctx := licenses.SetContextLicenseScanner(context.Background(), scanner)
tests := []struct { tests := []struct {
fixture string fixture string

View File

@ -109,7 +109,7 @@ func findLicenses(ctx context.Context, scanner licenses.Scanner, resolver file.R
if len(found) > 0 { if len(found) > 0 {
metadataContents, err := resolver.FileContentsByLocation(found[0]) metadataContents, err := resolver.FileContentsByLocation(found[0])
if err == nil { if err == nil {
parsed, err := licenses.Search(ctx, scanner, file.NewLocationReadCloser(m.LicenseLocation, metadataContents)) parsed, err := scanner.PkgSearch(ctx, file.NewLocationReadCloser(m.LicenseLocation, metadataContents))
if err != nil { if err != nil {
log.WithFields("error", err).Tracef("unable to parse a license from the file in %s", m.LicenseLocation.Path()) log.WithFields("error", err).Tracef("unable to parse a license from the file in %s", m.LicenseLocation.Path())
} }

View File

@ -20,8 +20,10 @@ import (
// parseWheelOrEgg takes the primary metadata file reference and returns the python package it represents. Contained // parseWheelOrEgg takes the primary metadata file reference and returns the python package it represents. Contained
// fields are governed by the PyPA core metadata specification (https://packaging.python.org/en/latest/specifications/core-metadata/). // fields are governed by the PyPA core metadata specification (https://packaging.python.org/en/latest/specifications/core-metadata/).
func parseWheelOrEgg(ctx context.Context, resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { func parseWheelOrEgg(ctx context.Context, resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
licenseScanner := licenses.ContextLicenseScanner(ctx) licenseScanner, err := licenses.ContextLicenseScanner(ctx)
if err != nil {
return nil, nil, err
}
pd, sources, err := assembleEggOrWheelMetadata(resolver, reader.Location) pd, sources, err := assembleEggOrWheelMetadata(resolver, reader.Location)
if err != nil { if err != nil {
return nil, nil, err return nil, nil, err

45
test/cli/license_test.go Normal file
View File

@ -0,0 +1,45 @@
package cli
import "testing"
func Test_Licenses(t *testing.T) {
testImage := getFixtureImage(t, "image-unknown-licenses")
tests := []struct {
name string
args []string
env map[string]string
assertions []traitAssertion
}{
{
name: "licenses default with no content",
args: []string{"scan", "-o", "json", testImage, "--from", "docker-archive"},
env: map[string]string{"SYFT_FORMAT_PRETTY": "true"},
assertions: []traitAssertion{
assertJsonReport,
assertUnknownLicenseContent(false),
assertSuccessfulReturnCode,
},
},
{
name: "licenses with content",
args: []string{"scan", "-o", "json", testImage, "--from", "docker-archive"},
env: map[string]string{"SYFT_FORMAT_PRETTY": "true", "SYFT_LICENSE_INCLUDE_UNKNOWN_LICENSE_CONTENT": "true"},
assertions: []traitAssertion{
assertJsonReport,
assertUnknownLicenseContent(true),
assertSuccessfulReturnCode,
},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
cmd, stdout, stderr := runSyft(t, test.env, test.args...)
for _, traitFn := range test.assertions {
traitFn(t, stdout, stderr, cmd.ProcessState.ExitCode())
}
logOutputOnFailure(t, cmd, stdout, stderr)
})
}
}

View File

@ -0,0 +1,3 @@
FROM alpine@sha256:c5c5fda71656f28e49ac9c5416b3643eaa6a108a8093151d6d1afc9463be8e33
RUN rm -rf /lib/apk/db/installed
COPY . /home/files

View File

@ -10,7 +10,10 @@ import (
"testing" "testing"
"github.com/acarl005/stripansi" "github.com/acarl005/stripansi"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/anchore/syft/syft/format/syftjson/model"
) )
type traitAssertion func(tb testing.TB, stdout, stderr string, rc int) type traitAssertion func(tb testing.TB, stdout, stderr string, rc int)
@ -148,6 +151,34 @@ func assertPackageCount(length uint) traitAssertion {
} }
} }
func assertUnknownLicenseContent(required bool) traitAssertion {
return func(tb testing.TB, stdout, _ string, _ int) {
tb.Helper()
type NameAndLicense struct {
Name string `json:"name"`
Licenses []model.License `json:"Licenses"`
}
type partial struct {
Artifacts []NameAndLicense `json:"artifacts"`
}
var data partial
if err := json.Unmarshal([]byte(stdout), &data); err != nil {
tb.Errorf("expected to find a JSON report, but was unmarshalable: %+v", err)
}
for _, pkg := range data.Artifacts {
for _, lic := range pkg.Licenses {
if strings.Contains(lic.SPDXExpression, "UNKNOWN") && required {
assert.NotZero(tb, len(lic.Contents))
} else {
assert.Empty(tb, lic.Contents)
}
}
}
}
}
func assertFailingReturnCode(tb testing.TB, _, _ string, rc int) { func assertFailingReturnCode(tb testing.TB, _, _ string, rc int) {
tb.Helper() tb.Helper()
if rc == 0 { if rc == 0 {