diff --git a/cmd/syft/internal/options/catalog.go b/cmd/syft/internal/options/catalog.go index e59047cb2..870b02507 100644 --- a/cmd/syft/internal/options/catalog.go +++ b/cmd/syft/internal/options/catalog.go @@ -33,6 +33,7 @@ type Catalog struct { DefaultCatalogers []string `yaml:"default-catalogers" json:"default-catalogers" mapstructure:"default-catalogers"` SelectCatalogers []string `yaml:"select-catalogers" json:"select-catalogers" mapstructure:"select-catalogers"` Package packageConfig `yaml:"package" json:"package" mapstructure:"package"` + License licenseConfig `yaml:"license" json:"license" mapstructure:"license"` File fileConfig `yaml:"file" json:"file" mapstructure:"file"` Scope string `yaml:"scope" json:"scope" mapstructure:"scope"` Parallelism int `yaml:"parallelism" json:"parallelism" mapstructure:"parallelism"` // the number of catalog workers to run in parallel @@ -69,6 +70,7 @@ func DefaultCatalog() Catalog { Compliance: defaultComplianceConfig(), Scope: source.SquashedScope.String(), Package: defaultPackageConfig(), + License: defaultLicenseConfig(), LinuxKernel: defaultLinuxKernelConfig(), Golang: defaultGolangConfig(), Java: defaultJavaConfig(), @@ -89,6 +91,7 @@ func (cfg Catalog) ToSBOMConfig(id clio.Identification) *syft.CreateSBOMConfig { WithUnknownsConfig(cfg.ToUnknownsConfig()). WithSearchConfig(cfg.ToSearchConfig()). WithPackagesConfig(cfg.ToPackagesConfig()). + WithLicenseConfig(cfg.ToLicenseConfig()). WithFilesConfig(cfg.ToFilesConfig()). WithCatalogerSelection( cataloging.NewSelectionRequest(). @@ -146,6 +149,13 @@ func (cfg Catalog) ToFilesConfig() filecataloging.Config { } } +func (cfg Catalog) ToLicenseConfig() cataloging.LicenseConfig { + return cataloging.LicenseConfig{ + IncludeUnkownLicenseContent: cfg.License.IncludeUnknownLicenseContent, + Coverage: cfg.License.LicenseCoverage, + } +} + func (cfg Catalog) ToPackagesConfig() pkgcataloging.Config { archiveSearch := cataloging.ArchiveSearchConfig{ IncludeIndexedArchives: cfg.Package.SearchIndexedArchives, diff --git a/cmd/syft/internal/options/license.go b/cmd/syft/internal/options/license.go new file mode 100644 index 000000000..1cce6c298 --- /dev/null +++ b/cmd/syft/internal/options/license.go @@ -0,0 +1,28 @@ +package options + +import ( + "github.com/anchore/clio" +) + +type licenseConfig struct { + IncludeUnknownLicenseContent bool `yaml:"include-unknown-license-content" json:"include-unknown-license-content" mapstructure:"include-unknown-license-content"` + LicenseCoverage float64 `yaml:"license-coverage" json:"license-coverage" mapstructure:"license-coverage"` +} + +var _ interface { + clio.FieldDescriber +} = (*licenseConfig)(nil) + +func (o *licenseConfig) DescribeFields(descriptions clio.FieldDescriptionSet) { + descriptions.Add(&o.IncludeUnknownLicenseContent, `include the content of a license in the SBOM when syft +cannot determine a valid SPDX ID for the given license`) + descriptions.Add(&o.LicenseCoverage, `adjust the percent as a fraction of the total text, in normalized words, that +matches any valid license for the given inputs, expressed as a percentage across all of the licenses matched.`) +} + +func defaultLicenseConfig() licenseConfig { + return licenseConfig{ + IncludeUnknownLicenseContent: false, + LicenseCoverage: 75, + } +} diff --git a/internal/licenses/context.go b/internal/licenses/context.go index 9a1224dae..9d735a8a3 100644 --- a/internal/licenses/context.go +++ b/internal/licenses/context.go @@ -10,9 +10,9 @@ func SetContextLicenseScanner(ctx context.Context, s Scanner) context.Context { return context.WithValue(ctx, licenseScannerKey{}, s) } -func ContextLicenseScanner(ctx context.Context) Scanner { +func ContextLicenseScanner(ctx context.Context) (Scanner, error) { if s, ok := ctx.Value(licenseScannerKey{}).(Scanner); ok { - return s + return s, nil } return NewDefaultScanner() } diff --git a/internal/licenses/scanner.go b/internal/licenses/scanner.go index 6ab7663e0..7728ba26b 100644 --- a/internal/licenses/scanner.go +++ b/internal/licenses/scanner.go @@ -2,66 +2,84 @@ package licenses import ( "context" + "fmt" "io" "github.com/google/licensecheck" "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/pkg" ) -const coverageThreshold = 75 // determined by experimentation +const ( + DefaultCoverageThreshold = 75 // determined by experimentation + DefaultIncludeLicenseContent = false +) type Scanner interface { IdentifyLicenseIDs(context.Context, io.Reader) ([]string, []byte, error) + FileSearch(context.Context, file.LocationReadCloser) ([]file.License, error) + PkgSearch(context.Context, file.LocationReadCloser) ([]pkg.License, error) } var _ Scanner = (*scanner)(nil) type scanner struct { - coverageThreshold float64 // between 0 and 100 - scanner func([]byte) licensecheck.Coverage + coverageThreshold float64 // between 0 and 100 + includeLicenseContent bool + scanner func([]byte) licensecheck.Coverage +} + +type ScannerConfig struct { + CoverageThreshold float64 + IncludeLicenseContent bool + Scanner func([]byte) licensecheck.Coverage +} + +type Option func(*scanner) + +func WithCoverage(coverage float64) Option { + return func(s *scanner) { + s.coverageThreshold = coverage + } +} + +func WithIncludeLicenseContent(includeLicenseContent bool) Option { + return func(s *scanner) { + s.includeLicenseContent = includeLicenseContent + } } // NewDefaultScanner returns a scanner that uses a new instance of the default licensecheck package scanner. -func NewDefaultScanner() Scanner { +func NewDefaultScanner(o ...Option) (Scanner, error) { s, err := licensecheck.NewScanner(licensecheck.BuiltinLicenses()) if err != nil { log.WithFields("error", err).Trace("unable to create default license scanner") - s = nil + return nil, fmt.Errorf("unable to create default license scanner: %w", err) } + newScanner := &scanner{ + coverageThreshold: DefaultCoverageThreshold, + includeLicenseContent: DefaultIncludeLicenseContent, + scanner: s.Scan, + } + + for _, opt := range o { + opt(newScanner) + } + return newScanner, nil +} + +// NewScanner generates a license Scanner with the given ScannerConfig +// if config is nil NewDefaultScanner is used +func NewScanner(c *ScannerConfig) (Scanner, error) { + if c == nil { + return NewDefaultScanner() + } + return &scanner{ - coverageThreshold: coverageThreshold, - scanner: s.Scan, - } -} - -func NewScanner(scan func([]byte) licensecheck.Coverage, coverage float64) Scanner { - return scanner{ - coverageThreshold: coverage, - scanner: scan, - } -} - -func (s scanner) IdentifyLicenseIDs(_ context.Context, reader io.Reader) ([]string, []byte, error) { - if s.scanner == nil { - return nil, nil, nil - } - - content, err := io.ReadAll(reader) - if err != nil { - return nil, nil, err - } - - cov := s.scanner(content) - if cov.Percent < s.coverageThreshold { - // unknown or no licenses here? - // => return binary content - return nil, content, nil - } - - var ids []string - for _, m := range cov.Match { - ids = append(ids, m.ID) - } - return ids, nil, nil + coverageThreshold: c.CoverageThreshold, + includeLicenseContent: c.IncludeLicenseContent, + scanner: c.Scanner, + }, nil } diff --git a/internal/licenses/scanner_test.go b/internal/licenses/scanner_test.go index 923b310d8..090a65464 100644 --- a/internal/licenses/scanner_test.go +++ b/internal/licenses/scanner_test.go @@ -27,11 +27,11 @@ func TestIdentifyLicenseIDs(t *testing.T) { expected: expectation{ yieldError: false, ids: []string{"Apache-2.0"}, - content: []byte{}, + content: nil, }, }, { - name: "custom license", + name: "custom license includes content for IdentifyLicenseIDs", in: "test-fixtures/nvidia-software-and-cuda-supplement", expected: expectation{ yieldError: false, @@ -45,7 +45,7 @@ func TestIdentifyLicenseIDs(t *testing.T) { t.Run(test.name, func(t *testing.T) { content, err := os.ReadFile(test.in) require.NoError(t, err) - ids, content, err := testScanner().IdentifyLicenseIDs(context.TODO(), bytes.NewReader(content)) + ids, content, err := testScanner(false).IdentifyLicenseIDs(context.TODO(), bytes.NewReader(content)) if test.expected.yieldError { require.Error(t, err) } else { @@ -66,10 +66,11 @@ func TestIdentifyLicenseIDs(t *testing.T) { } } -func testScanner() Scanner { +func testScanner(includeLicenseContent bool) Scanner { return &scanner{ - coverageThreshold: coverageThreshold, - scanner: licensecheck.Scan, + coverageThreshold: DefaultCoverageThreshold, + includeLicenseContent: includeLicenseContent, + scanner: licensecheck.Scan, } } diff --git a/internal/licenses/search.go b/internal/licenses/search.go index efab2d99c..1ade9b633 100644 --- a/internal/licenses/search.go +++ b/internal/licenses/search.go @@ -4,6 +4,7 @@ import ( "context" "crypto/sha256" "fmt" + "io" "strings" "github.com/anchore/syft/syft/file" @@ -21,11 +22,35 @@ func getCustomLicenseContentHash(contents []byte) string { return fmt.Sprintf("%x", hash[:]) } -// Search scans the contents of a license file to attempt to determine the type of license it is -func Search(ctx context.Context, scanner Scanner, reader file.LocationReadCloser) (licenses []pkg.License, err error) { +func (s *scanner) IdentifyLicenseIDs(_ context.Context, reader io.Reader) ([]string, []byte, error) { + if s.scanner == nil { + return nil, nil, nil + } + + content, err := io.ReadAll(reader) + if err != nil { + return nil, nil, err + } + + cov := s.scanner(content) + if cov.Percent < s.coverageThreshold { + // unknown or no licenses here + // => check return content to Search to process + return nil, content, nil + } + + var ids []string + for _, m := range cov.Match { + ids = append(ids, m.ID) + } + return ids, nil, nil +} + +// PkgSearch scans the contents of a license file to attempt to determine the type of license it is +func (s *scanner) PkgSearch(ctx context.Context, reader file.LocationReadCloser) (licenses []pkg.License, err error) { licenses = make([]pkg.License, 0) - ids, content, err := scanner.IdentifyLicenseIDs(ctx, reader) + ids, content, err := s.IdentifyLicenseIDs(ctx, reader) if err != nil { return nil, err } @@ -48,7 +73,47 @@ func Search(ctx context.Context, scanner Scanner, reader file.LocationReadCloser lic := pkg.NewLicenseFromLocations(unknownLicenseType, reader.Location) lic.SPDXExpression = UnknownLicensePrefix + getCustomLicenseContentHash(content) - lic.Contents = string(content) + if s.includeLicenseContent { + lic.Contents = string(content) + } + lic.Type = license.Declared + + licenses = append(licenses, lic) + } + + return licenses, nil +} + +// FileSearch scans the contents of a license file to attempt to determine the type of license it is +func (s *scanner) FileSearch(ctx context.Context, reader file.LocationReadCloser) (licenses []file.License, err error) { + licenses = make([]file.License, 0) + + ids, content, err := s.IdentifyLicenseIDs(ctx, reader) + if err != nil { + return nil, err + } + + // IdentifyLicenseIDs can only return a list of ID or content + // These return values are mutually exclusive. + // If the scanner threshold for matching scores < 75% then we return the license full content + if len(ids) > 0 { + for _, id := range ids { + lic := file.NewLicense(id) + lic.Type = license.Concluded + + licenses = append(licenses, lic) + } + } else if len(content) > 0 { + // harmonize line endings to unix compatible first: + // 1. \r\n => \n (Windows => UNIX) + // 2. \r => \n (Macintosh => UNIX) + content = []byte(strings.ReplaceAll(strings.ReplaceAll(string(content), "\r\n", "\n"), "\r", "\n")) + + lic := file.NewLicense(unknownLicenseType) + lic.SPDXExpression = UnknownLicensePrefix + getCustomLicenseContentHash(content) + if s.includeLicenseContent { + lic.Contents = string(content) + } lic.Type = license.Declared licenses = append(licenses, lic) diff --git a/internal/licenses/search_test.go b/internal/licenses/search_test.go index 83afec1fd..11138bf49 100644 --- a/internal/licenses/search_test.go +++ b/internal/licenses/search_test.go @@ -27,47 +27,28 @@ func newBytesReadCloser(data []byte) *bytesReadCloser { } } -func TestSearch(t *testing.T) { +func TestSearchFileLicenses(t *testing.T) { type expectation struct { yieldError bool - licenses []pkg.License + licenses []file.License } - testLocation := file.NewLocation("LICENSE") + tests := []struct { - name string - in string - expected expectation + name string + in string + includeUnkownLicenseContent bool + expected expectation }{ { name: "apache license 2.0", in: "test-fixtures/apache-license-2.0", expected: expectation{ yieldError: false, - licenses: []pkg.License{ + licenses: []file.License{ { Value: "Apache-2.0", SPDXExpression: "Apache-2.0", Type: "concluded", - URLs: nil, - Locations: file.NewLocationSet(testLocation), - Contents: "", - }, - }, - }, - }, - { - name: "custom license", - in: "test-fixtures/nvidia-software-and-cuda-supplement", - expected: expectation{ - yieldError: false, - licenses: []pkg.License{ - { - Value: "UNKNOWN", - SPDXExpression: "UNKNOWN_eebcea3ab1d1a28e671de90119ffcfb35fe86951e4af1b17af52b7a82fcf7d0a", - Type: "declared", - URLs: nil, - Locations: file.NewLocationSet(testLocation), - Contents: string(mustOpen("test-fixtures/nvidia-software-and-cuda-supplement")), }, }, }, @@ -76,9 +57,11 @@ func TestSearch(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { + ctx := context.TODO() content, err := os.ReadFile(test.in) require.NoError(t, err) - result, err := Search(context.TODO(), testScanner(), file.NewLocationReadCloser(file.NewLocation("LICENSE"), io.NopCloser(bytes.NewReader(content)))) + s := testScanner(false) + result, err := s.FileSearch(ctx, file.NewLocationReadCloser(file.NewLocation("LICENSE"), io.NopCloser(bytes.NewReader(content)))) if test.expected.yieldError { require.Error(t, err) } else { @@ -93,3 +76,91 @@ func TestSearch(t *testing.T) { }) } } + +func TestSearchPkgLicenses(t *testing.T) { + type expectation struct { + wantErr require.ErrorAssertionFunc + licenses []pkg.License + } + + testLocation := file.NewLocation("LICENSE") + tests := []struct { + name string + in string + includeUnkownLicenseContent bool + expected expectation + }{ + { + name: "apache license 2.0", + in: "test-fixtures/apache-license-2.0", + expected: expectation{ + licenses: []pkg.License{ + { + Value: "Apache-2.0", + SPDXExpression: "Apache-2.0", + Type: "concluded", + URLs: nil, + Locations: file.NewLocationSet(testLocation), + Contents: "", + }, + }, + wantErr: nil, + }, + }, + { + name: "custom license no content by default", + in: "test-fixtures/nvidia-software-and-cuda-supplement", + expected: expectation{ + licenses: []pkg.License{ + { + Value: "UNKNOWN", + SPDXExpression: "UNKNOWN_eebcea3ab1d1a28e671de90119ffcfb35fe86951e4af1b17af52b7a82fcf7d0a", + Type: "declared", + URLs: nil, + Locations: file.NewLocationSet(testLocation), + Contents: "", + }, + }, + wantErr: nil, + }, + }, + { + name: "custom license with content when scanner has content config", + in: "test-fixtures/nvidia-software-and-cuda-supplement", + includeUnkownLicenseContent: true, + expected: expectation{ + licenses: []pkg.License{ + { + Value: "UNKNOWN", + SPDXExpression: "UNKNOWN_eebcea3ab1d1a28e671de90119ffcfb35fe86951e4af1b17af52b7a82fcf7d0a", + Type: "declared", + URLs: nil, + Locations: file.NewLocationSet(testLocation), + Contents: string(mustOpen("test-fixtures/nvidia-software-and-cuda-supplement")), + }, + }, + wantErr: nil, + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + ctx := context.TODO() + content, err := os.ReadFile(test.in) + require.NoError(t, err) + s := testScanner(test.includeUnkownLicenseContent) + result, err := s.PkgSearch(ctx, file.NewLocationReadCloser(file.NewLocation("LICENSE"), io.NopCloser(bytes.NewReader(content)))) + if test.expected.wantErr != nil { + test.expected.wantErr(t, err) + } + require.NoError(t, err) + + require.Len(t, result, len(test.expected.licenses)) + + if len(test.expected.licenses) > 0 { + require.Equal(t, test.expected.licenses, result) + } + }) + } +} diff --git a/internal/task/cataloging_config.go b/internal/task/cataloging_config.go index bef9d673c..e59661a30 100644 --- a/internal/task/cataloging_config.go +++ b/internal/task/cataloging_config.go @@ -11,6 +11,7 @@ type CatalogingFactoryConfig struct { SearchConfig cataloging.SearchConfig RelationshipsConfig cataloging.RelationshipsConfig DataGenerationConfig cataloging.DataGenerationConfig + LicenseConfig cataloging.LicenseConfig PackagesConfig pkgcataloging.Config FilesConfig filecataloging.Config } @@ -21,6 +22,7 @@ func DefaultCatalogingFactoryConfig() CatalogingFactoryConfig { SearchConfig: cataloging.DefaultSearchConfig(), RelationshipsConfig: cataloging.DefaultRelationshipsConfig(), DataGenerationConfig: cataloging.DefaultDataGenerationConfig(), + LicenseConfig: cataloging.DefaultLicenseConfig(), PackagesConfig: pkgcataloging.DefaultConfig(), FilesConfig: filecataloging.DefaultConfig(), } diff --git a/syft/cataloging/license.go b/syft/cataloging/license.go new file mode 100644 index 000000000..24d8686f5 --- /dev/null +++ b/syft/cataloging/license.go @@ -0,0 +1,15 @@ +package cataloging + +import "github.com/anchore/syft/internal/licenses" + +type LicenseConfig struct { + IncludeUnkownLicenseContent bool `json:"include-unknown-license-content" yaml:"include-unknown-license-content" mapstructure:"include-unknown-license-content"` + Coverage float64 `json:"coverage" yaml:"coverage" mapstructure:"coverage"` +} + +func DefaultLicenseConfig() LicenseConfig { + return LicenseConfig{ + IncludeUnkownLicenseContent: licenses.DefaultIncludeLicenseContent, + Coverage: licenses.DefaultCoverageThreshold, + } +} diff --git a/syft/configuration_audit_trail.go b/syft/configuration_audit_trail.go index 7346cefd3..ece571581 100644 --- a/syft/configuration_audit_trail.go +++ b/syft/configuration_audit_trail.go @@ -16,6 +16,7 @@ type configurationAuditTrail struct { DataGeneration cataloging.DataGenerationConfig `json:"data-generation" yaml:"data-generation" mapstructure:"data-generation"` Packages pkgcataloging.Config `json:"packages" yaml:"packages" mapstructure:"packages"` Files filecataloging.Config `json:"files" yaml:"files" mapstructure:"files"` + Licenses cataloging.LicenseConfig `json:"licenses" yaml:"licenses" mapstructure:"licenses"` Catalogers catalogerManifest `json:"catalogers" yaml:"catalogers" mapstructure:"catalogers"` ExtraConfigs any `json:"extra,omitempty" yaml:"extra" mapstructure:"extra"` } diff --git a/syft/create_sbom.go b/syft/create_sbom.go index de732c5ad..1d578191a 100644 --- a/syft/create_sbom.go +++ b/syft/create_sbom.go @@ -52,6 +52,7 @@ func CreateSBOM(ctx context.Context, src source.Source, cfg *CreateSBOMConfig) ( DataGeneration: cfg.DataGeneration, Packages: cfg.Packages, Files: cfg.Files, + Licenses: cfg.Licenses, Catalogers: *audit, ExtraConfigs: cfg.ToolConfiguration, }, @@ -61,8 +62,15 @@ func CreateSBOM(ctx context.Context, src source.Source, cfg *CreateSBOMConfig) ( }, } - // inject a single license scanner for all package cataloging tasks into context - ctx = licenses.SetContextLicenseScanner(ctx, licenses.NewDefaultScanner()) + // inject a single license scanner and content config for all package cataloging tasks into context + licenseScanner, err := licenses.NewDefaultScanner( + licenses.WithIncludeLicenseContent(cfg.Licenses.IncludeUnkownLicenseContent), + licenses.WithCoverage(cfg.Licenses.Coverage), + ) + if err != nil { + return nil, fmt.Errorf("could not build licenseScanner for cataloging: %w", err) + } + ctx = licenses.SetContextLicenseScanner(ctx, licenseScanner) catalogingProgress := monitorCatalogingTask(src.ID(), taskGroups) packageCatalogingProgress := monitorPackageCatalogingTask() diff --git a/syft/create_sbom_config.go b/syft/create_sbom_config.go index 09859211a..1d4d2eadf 100644 --- a/syft/create_sbom_config.go +++ b/syft/create_sbom_config.go @@ -26,6 +26,7 @@ type CreateSBOMConfig struct { Unknowns cataloging.UnknownsConfig DataGeneration cataloging.DataGenerationConfig Packages pkgcataloging.Config + Licenses cataloging.LicenseConfig Files filecataloging.Config Parallelism int CatalogerSelection cataloging.SelectionRequest @@ -46,6 +47,7 @@ func DefaultCreateSBOMConfig() *CreateSBOMConfig { Relationships: cataloging.DefaultRelationshipsConfig(), DataGeneration: cataloging.DefaultDataGenerationConfig(), Packages: pkgcataloging.DefaultConfig(), + Licenses: cataloging.DefaultLicenseConfig(), Files: filecataloging.DefaultConfig(), Parallelism: 1, packageTaskFactories: task.DefaultPackageTaskFactories(), @@ -134,6 +136,12 @@ func (c *CreateSBOMConfig) WithPackagesConfig(cfg pkgcataloging.Config) *CreateS return c } +// WithPackagesConfig allows for defining any specific behavior for syft-implemented catalogers. +func (c *CreateSBOMConfig) WithLicenseConfig(cfg cataloging.LicenseConfig) *CreateSBOMConfig { + c.Licenses = cfg + return c +} + // WithFilesConfig allows for defining file-based cataloging parameters. func (c *CreateSBOMConfig) WithFilesConfig(cfg filecataloging.Config) *CreateSBOMConfig { c.Files = cfg diff --git a/syft/file/license.go b/syft/file/license.go index 08d77e052..ea0dc6e9a 100644 --- a/syft/file/license.go +++ b/syft/file/license.go @@ -10,6 +10,7 @@ type License struct { SPDXExpression string Type license.Type LicenseEvidence *LicenseEvidence // evidence from license classifier + Contents string `hash:"ignore"` } type LicenseEvidence struct { diff --git a/syft/pkg/cataloger/golang/licenses.go b/syft/pkg/cataloger/golang/licenses.go index 857d04866..7e8b38b98 100644 --- a/syft/pkg/cataloger/golang/licenses.go +++ b/syft/pkg/cataloger/golang/licenses.go @@ -214,7 +214,7 @@ func (c *goLicenseResolver) findLicensesInFS(ctx context.Context, scanner licens } defer internal.CloseAndLogError(rdr, filePath) - parsed, err := licenses.Search(ctx, scanner, file.NewLocationReadCloser(file.NewLocation(filePath), rdr)) + parsed, err := scanner.PkgSearch(ctx, file.NewLocationReadCloser(file.NewLocation(filePath), rdr)) if err != nil { log.Debugf("error parsing license file %s: %v", filePath, err) return nil @@ -267,7 +267,7 @@ func (c *goLicenseResolver) parseLicenseFromLocation(ctx context.Context, scanne return nil, err } defer internal.CloseAndLogError(contents, l.RealPath) - parsed, err := licenses.Search(ctx, scanner, file.NewLocationReadCloser(l, contents)) + parsed, err := scanner.PkgSearch(ctx, file.NewLocationReadCloser(l, contents)) if err != nil { return nil, err } diff --git a/syft/pkg/cataloger/golang/licenses_test.go b/syft/pkg/cataloger/golang/licenses_test.go index c3e783d4f..13a73ebee 100644 --- a/syft/pkg/cataloger/golang/licenses_test.go +++ b/syft/pkg/cataloger/golang/licenses_test.go @@ -71,7 +71,12 @@ func Test_LicenseSearch(t *testing.T) { localVendorDir := filepath.Join(wd, "test-fixtures", "licenses-vendor") - licenseScanner := licenses.NewScanner(licensecheck.Scan, float64(75)) + sc := &licenses.ScannerConfig{ + CoverageThreshold: 75, + Scanner: licensecheck.Scan, + } + licenseScanner, err := licenses.NewScanner(sc) + require.NoError(t, err) tests := []struct { name string @@ -296,7 +301,10 @@ func Test_findVersionPath(t *testing.T) { func Test_walkDirErrors(t *testing.T) { resolver := newGoLicenseResolver("", CatalogerConfig{}) - _, err := resolver.findLicensesInFS(context.Background(), licenses.NewScanner(licensecheck.Scan, float64(75)), "somewhere", badFS{}) + sc := &licenses.ScannerConfig{Scanner: licensecheck.Scan, CoverageThreshold: 75} + scanner, err := licenses.NewScanner(sc) + require.NoError(t, err) + _, err = resolver.findLicensesInFS(context.Background(), scanner, "somewhere", badFS{}) require.Error(t, err) } @@ -314,7 +322,9 @@ func Test_noLocalGoModDir(t *testing.T) { validTmp := t.TempDir() require.NoError(t, os.MkdirAll(filepath.Join(validTmp, "mod@ver"), 0700|os.ModeDir)) - licenseScanner := licenses.NewScanner(licensecheck.Scan, float64(75)) + sc := &licenses.ScannerConfig{Scanner: licensecheck.Scan, CoverageThreshold: 75} + licenseScanner, err := licenses.NewScanner(sc) + require.NoError(t, err) tests := []struct { name string dir string diff --git a/syft/pkg/cataloger/golang/parse_go_binary.go b/syft/pkg/cataloger/golang/parse_go_binary.go index 1671886b6..83f679467 100644 --- a/syft/pkg/cataloger/golang/parse_go_binary.go +++ b/syft/pkg/cataloger/golang/parse_go_binary.go @@ -63,7 +63,10 @@ func newGoBinaryCataloger(opts CatalogerConfig) *goBinaryCataloger { func (c *goBinaryCataloger) parseGoBinary(ctx context.Context, resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { var pkgs []pkg.Package - licenseScanner := licenses.ContextLicenseScanner(ctx) + licenseScanner, err := licenses.ContextLicenseScanner(ctx) + if err != nil { + return nil, nil, err + } unionReader, err := unionreader.GetUnionReader(reader.ReadCloser) if err != nil { diff --git a/syft/pkg/cataloger/golang/parse_go_binary_test.go b/syft/pkg/cataloger/golang/parse_go_binary_test.go index 62d804d7f..12d12cd8c 100644 --- a/syft/pkg/cataloger/golang/parse_go_binary_test.go +++ b/syft/pkg/cataloger/golang/parse_go_binary_test.go @@ -170,7 +170,9 @@ func TestBuildGoPkgInfo(t *testing.T) { }, } - licenseScanner := licenses.NewScanner(licensecheck.Scan, float64(75)) + sc := &licenses.ScannerConfig{Scanner: licensecheck.Scan, CoverageThreshold: 75} + licenseScanner, err := licenses.NewScanner(sc) + require.NoError(t, err) tests := []struct { name string diff --git a/syft/pkg/cataloger/golang/parse_go_mod.go b/syft/pkg/cataloger/golang/parse_go_mod.go index 4a3f11236..227f4c5e4 100644 --- a/syft/pkg/cataloger/golang/parse_go_mod.go +++ b/syft/pkg/cataloger/golang/parse_go_mod.go @@ -35,7 +35,10 @@ func newGoModCataloger(opts CatalogerConfig) *goModCataloger { func (c *goModCataloger) parseGoModFile(ctx context.Context, resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { packages := make(map[string]pkg.Package) - licenseScanner := licenses.ContextLicenseScanner(ctx) + licenseScanner, err := licenses.ContextLicenseScanner(ctx) + if err != nil { + return nil, nil, fmt.Errorf("unable to create default license scanner: %w", err) + } contents, err := io.ReadAll(reader) if err != nil { diff --git a/syft/pkg/cataloger/java/archive_parser.go b/syft/pkg/cataloger/java/archive_parser.go index e0ed965b2..b4cf8b0c1 100644 --- a/syft/pkg/cataloger/java/archive_parser.go +++ b/syft/pkg/cataloger/java/archive_parser.go @@ -96,7 +96,10 @@ func uniquePkgKey(groupID string, p *pkg.Package) string { // newJavaArchiveParser returns a new java archive parser object for the given archive. Can be configured to discover // and parse nested archives or ignore them. func newJavaArchiveParser(ctx context.Context, reader file.LocationReadCloser, detectNested bool, cfg ArchiveCatalogerConfig) (*archiveParser, func(), error) { - licenseScanner := licenses.ContextLicenseScanner(ctx) + licenseScanner, err := licenses.ContextLicenseScanner(ctx) + if err != nil { + return nil, nil, fmt.Errorf("could not build license scanner for java archive parser: %w", err) + } // fetch the last element of the virtual path virtualElements := strings.Split(reader.Path(), ":") @@ -506,7 +509,7 @@ func (j *archiveParser) getLicenseFromFileInArchive(ctx context.Context) ([]pkg. for _, licenseMatch := range licenseMatches { licenseContents := contents[licenseMatch] r := strings.NewReader(licenseContents) - parsed, err := licenses.Search(ctx, j.licenseScanner, file.NewLocationReadCloser(j.location, io.NopCloser(r))) + parsed, err := j.licenseScanner.PkgSearch(ctx, file.NewLocationReadCloser(j.location, io.NopCloser(r))) if err != nil { return nil, err } diff --git a/syft/pkg/cataloger/java/archive_parser_test.go b/syft/pkg/cataloger/java/archive_parser_test.go index 2b389c441..4fb439bce 100644 --- a/syft/pkg/cataloger/java/archive_parser_test.go +++ b/syft/pkg/cataloger/java/archive_parser_test.go @@ -32,8 +32,10 @@ import ( func TestSearchMavenForLicenses(t *testing.T) { url := maventest.MockRepo(t, "internal/maven/test-fixtures/maven-repo") - - ctx := licenses.SetContextLicenseScanner(context.Background(), licenses.NewScanner(licensecheck.Scan, float64(75))) + sc := &licenses.ScannerConfig{Scanner: licensecheck.Scan, CoverageThreshold: 75} + scanner, err := licenses.NewScanner(sc) + require.NoError(t, err) + ctx := licenses.SetContextLicenseScanner(context.Background(), scanner) tests := []struct { name string @@ -92,7 +94,10 @@ func TestSearchMavenForLicenses(t *testing.T) { } func TestParseJar(t *testing.T) { - ctx := licenses.SetContextLicenseScanner(context.Background(), licenses.NewScanner(licensecheck.Scan, float64(75))) + sc := &licenses.ScannerConfig{Scanner: licensecheck.Scan, CoverageThreshold: 75} + scanner, err := licenses.NewScanner(sc) + require.NoError(t, err) + ctx := licenses.SetContextLicenseScanner(context.Background(), scanner) tests := []struct { name string @@ -1375,7 +1380,10 @@ func Test_parseJavaArchive_regressions(t *testing.T) { } func Test_deterministicMatchingPomProperties(t *testing.T) { - ctx := licenses.SetContextLicenseScanner(context.Background(), licenses.NewScanner(licensecheck.Scan, float64(75))) + sc := &licenses.ScannerConfig{Scanner: licensecheck.Scan, CoverageThreshold: 75} + scanner, err := licenses.NewScanner(sc) + require.NoError(t, err) + ctx := licenses.SetContextLicenseScanner(context.Background(), scanner) tests := []struct { fixture string diff --git a/syft/pkg/cataloger/python/package.go b/syft/pkg/cataloger/python/package.go index e7c861256..323549bb9 100644 --- a/syft/pkg/cataloger/python/package.go +++ b/syft/pkg/cataloger/python/package.go @@ -109,7 +109,7 @@ func findLicenses(ctx context.Context, scanner licenses.Scanner, resolver file.R if len(found) > 0 { metadataContents, err := resolver.FileContentsByLocation(found[0]) if err == nil { - parsed, err := licenses.Search(ctx, scanner, file.NewLocationReadCloser(m.LicenseLocation, metadataContents)) + parsed, err := scanner.PkgSearch(ctx, file.NewLocationReadCloser(m.LicenseLocation, metadataContents)) if err != nil { log.WithFields("error", err).Tracef("unable to parse a license from the file in %s", m.LicenseLocation.Path()) } diff --git a/syft/pkg/cataloger/python/parse_wheel_egg.go b/syft/pkg/cataloger/python/parse_wheel_egg.go index 2d2b9487c..a593778eb 100644 --- a/syft/pkg/cataloger/python/parse_wheel_egg.go +++ b/syft/pkg/cataloger/python/parse_wheel_egg.go @@ -20,8 +20,10 @@ import ( // parseWheelOrEgg takes the primary metadata file reference and returns the python package it represents. Contained // fields are governed by the PyPA core metadata specification (https://packaging.python.org/en/latest/specifications/core-metadata/). func parseWheelOrEgg(ctx context.Context, resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { - licenseScanner := licenses.ContextLicenseScanner(ctx) - + licenseScanner, err := licenses.ContextLicenseScanner(ctx) + if err != nil { + return nil, nil, err + } pd, sources, err := assembleEggOrWheelMetadata(resolver, reader.Location) if err != nil { return nil, nil, err diff --git a/test/cli/license_test.go b/test/cli/license_test.go new file mode 100644 index 000000000..260e8fdeb --- /dev/null +++ b/test/cli/license_test.go @@ -0,0 +1,45 @@ +package cli + +import "testing" + +func Test_Licenses(t *testing.T) { + testImage := getFixtureImage(t, "image-unknown-licenses") + tests := []struct { + name string + args []string + env map[string]string + assertions []traitAssertion + }{ + { + name: "licenses default with no content", + args: []string{"scan", "-o", "json", testImage, "--from", "docker-archive"}, + env: map[string]string{"SYFT_FORMAT_PRETTY": "true"}, + assertions: []traitAssertion{ + assertJsonReport, + assertUnknownLicenseContent(false), + assertSuccessfulReturnCode, + }, + }, + + { + name: "licenses with content", + args: []string{"scan", "-o", "json", testImage, "--from", "docker-archive"}, + env: map[string]string{"SYFT_FORMAT_PRETTY": "true", "SYFT_LICENSE_INCLUDE_UNKNOWN_LICENSE_CONTENT": "true"}, + assertions: []traitAssertion{ + assertJsonReport, + assertUnknownLicenseContent(true), + assertSuccessfulReturnCode, + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + cmd, stdout, stderr := runSyft(t, test.env, test.args...) + for _, traitFn := range test.assertions { + traitFn(t, stdout, stderr, cmd.ProcessState.ExitCode()) + } + logOutputOnFailure(t, cmd, stdout, stderr) + }) + } +} diff --git a/test/cli/test-fixtures/image-unknown-licenses/Dockerfile b/test/cli/test-fixtures/image-unknown-licenses/Dockerfile new file mode 100644 index 000000000..503f6808d --- /dev/null +++ b/test/cli/test-fixtures/image-unknown-licenses/Dockerfile @@ -0,0 +1,3 @@ +FROM alpine@sha256:c5c5fda71656f28e49ac9c5416b3643eaa6a108a8093151d6d1afc9463be8e33 +RUN rm -rf /lib/apk/db/installed +COPY . /home/files diff --git a/test/cli/trait_assertions_test.go b/test/cli/trait_assertions_test.go index ca01c9cfd..389dc89fe 100644 --- a/test/cli/trait_assertions_test.go +++ b/test/cli/trait_assertions_test.go @@ -10,7 +10,10 @@ import ( "testing" "github.com/acarl005/stripansi" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + + "github.com/anchore/syft/syft/format/syftjson/model" ) type traitAssertion func(tb testing.TB, stdout, stderr string, rc int) @@ -148,6 +151,34 @@ func assertPackageCount(length uint) traitAssertion { } } +func assertUnknownLicenseContent(required bool) traitAssertion { + return func(tb testing.TB, stdout, _ string, _ int) { + tb.Helper() + type NameAndLicense struct { + Name string `json:"name"` + Licenses []model.License `json:"Licenses"` + } + type partial struct { + Artifacts []NameAndLicense `json:"artifacts"` + } + + var data partial + if err := json.Unmarshal([]byte(stdout), &data); err != nil { + tb.Errorf("expected to find a JSON report, but was unmarshalable: %+v", err) + } + + for _, pkg := range data.Artifacts { + for _, lic := range pkg.Licenses { + if strings.Contains(lic.SPDXExpression, "UNKNOWN") && required { + assert.NotZero(tb, len(lic.Contents)) + } else { + assert.Empty(tb, lic.Contents) + } + } + } + } +} + func assertFailingReturnCode(tb testing.TB, _, _ string, rc int) { tb.Helper() if rc == 0 {