From e5d7760bb8c4b0738611371da110cfab6d605daf Mon Sep 17 00:00:00 2001 From: Christopher Angelo Phillips <32073428+spiffcs@users.noreply.github.com> Date: Wed, 14 May 2025 08:41:48 -0400 Subject: [PATCH] feat: improve dpkg cataloger license recognition for "license agreements" (#3888) --- syft/pkg/cataloger/debian/package.go | 11 +- syft/pkg/cataloger/debian/parse_copyright.go | 50 ++++-- .../cataloger/debian/parse_copyright_test.go | 12 ++ .../debian/test-fixtures/copyright/cuda | 145 ++++++++++++++++++ .../debian/test-fixtures/copyright/dev-kit | 1 + .../debian/test-fixtures/copyright/microsoft | 1 + 6 files changed, 207 insertions(+), 13 deletions(-) create mode 100644 syft/pkg/cataloger/debian/test-fixtures/copyright/cuda create mode 100644 syft/pkg/cataloger/debian/test-fixtures/copyright/dev-kit create mode 100644 syft/pkg/cataloger/debian/test-fixtures/copyright/microsoft diff --git a/syft/pkg/cataloger/debian/package.go b/syft/pkg/cataloger/debian/package.go index b10084a59..5dd3c8c17 100644 --- a/syft/pkg/cataloger/debian/package.go +++ b/syft/pkg/cataloger/debian/package.go @@ -118,17 +118,24 @@ func addLicenses(ctx context.Context, resolver file.Resolver, dbLocation file.Lo // get license information from the copyright file copyrightReader, copyrightLocation := fetchCopyrightContents(resolver, dbLocation, metadata) - + var licenseStrs []string if copyrightReader != nil && copyrightLocation != nil { defer internal.CloseAndLogError(copyrightReader, copyrightLocation.AccessPath) // attach the licenses - licenseStrs := parseLicensesFromCopyright(copyrightReader) + licenseStrs = parseLicensesFromCopyright(copyrightReader) for _, licenseStr := range licenseStrs { p.Licenses.Add(pkg.NewLicenseFromLocationsWithContext(ctx, licenseStr, copyrightLocation.WithoutAnnotations())) } // keep a record of the file where this was discovered p.Locations.Add(*copyrightLocation) } + // try to use the license classifier if parsing the copyright file failed + if len(licenseStrs) == 0 { + sr, sl := fetchCopyrightContents(resolver, dbLocation, metadata) + if sr != nil && sl != nil { + p.Licenses.Add(pkg.NewLicensesFromReadCloserWithContext(ctx, file.NewLocationReadCloser(*sl, sr))...) + } + } } func mergeFileListing(resolver file.Resolver, dbLocation file.Location, p *pkg.Package) { diff --git a/syft/pkg/cataloger/debian/parse_copyright.go b/syft/pkg/cataloger/debian/parse_copyright.go index 3f02131b7..7b7985710 100644 --- a/syft/pkg/cataloger/debian/parse_copyright.go +++ b/syft/pkg/cataloger/debian/parse_copyright.go @@ -1,7 +1,6 @@ package debian import ( - "bufio" "io" "regexp" "sort" @@ -15,32 +14,49 @@ import ( // For more information see: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/#license-syntax var ( - licensePattern = regexp.MustCompile(`^License: (?P\S*)`) - commonLicensePathPattern = regexp.MustCompile(`/usr/share/common-licenses/(?P[0-9A-Za-z_.\-]+)`) + licensePattern = regexp.MustCompile(`^License: (?P\S*)`) + commonLicensePathPattern = regexp.MustCompile(`/usr/share/common-licenses/(?P[0-9A-Za-z_.\-]+)`) + licenseFirstSentenceAfterHeadingPattern = regexp.MustCompile(`(?is)^[^\n]+?\n[-]+?\n+(?P.*?\.)`) + licenseAgreementHeadingPattern = regexp.MustCompile(`(?i)^\s*(?PLICENSE AGREEMENT(?: FOR .+?)?)\s*$`) ) func parseLicensesFromCopyright(reader io.Reader) []string { findings := strset.New() - scanner := bufio.NewScanner(reader) + data, err := io.ReadAll(reader) + if err != nil { + // Fail-safe: return nothing if unable to read + return []string{} + } - for scanner.Scan() { - line := scanner.Text() - if value := findLicenseClause(licensePattern, "license", line); value != "" { + content := string(data) + lines := strings.Split(content, "\n") + for _, line := range lines { + if value := findLicenseClause(licensePattern, line); value != "" { findings.Add(value) } - if value := findLicenseClause(commonLicensePathPattern, "license", line); value != "" { + if value := findLicenseClause(commonLicensePathPattern, line); value != "" { + findings.Add(value) + } + if value := findLicenseClause(licenseAgreementHeadingPattern, line); value != "" { findings.Add(value) } } - results := findings.List() + // some copyright files have a license declaration after the heading ex: + // End User License Agreement\n-------------------------- + // we want to try and find these multi-line license declarations and make exceptions for them + if value := findLicenseClause(licenseFirstSentenceAfterHeadingPattern, content); value != "" { + findings.Add(value) + } + results := findings.List() sort.Strings(results) return results } -func findLicenseClause(pattern *regexp.Regexp, valueGroup, line string) string { +func findLicenseClause(pattern *regexp.Regexp, line string) string { + valueGroup := "license" matchesByGroup := internal.MatchNamedCaptureGroups(pattern, line) candidate, ok := matchesByGroup[valueGroup] @@ -51,9 +67,21 @@ func findLicenseClause(pattern *regexp.Regexp, valueGroup, line string) string { return ensureIsSingleLicense(candidate) } +var multiLicenseExceptions = []string{ + "NVIDIA Software License Agreement", +} + func ensureIsSingleLicense(candidate string) (license string) { - candidate = strings.TrimSpace(candidate) + candidate = strings.TrimSpace(strings.ReplaceAll(candidate, "\n", " ")) + + // Check for exceptions first + for _, exception := range multiLicenseExceptions { + if strings.Contains(candidate, exception) { + return strings.TrimSuffix(candidate, ".") + } + } if strings.Contains(candidate, " or ") || strings.Contains(candidate, " and ") { + // make sure this is not one of the license exceptions // this is a multi-license summary, ignore this as other recurrent license lines should cover this return } diff --git a/syft/pkg/cataloger/debian/parse_copyright_test.go b/syft/pkg/cataloger/debian/parse_copyright_test.go index 711990aa9..e16929496 100644 --- a/syft/pkg/cataloger/debian/parse_copyright_test.go +++ b/syft/pkg/cataloger/debian/parse_copyright_test.go @@ -35,6 +35,18 @@ func TestParseLicensesFromCopyright(t *testing.T) { // note: this should not capture #, Permission, This, see ... however it's not clear how to fix this (this is probably good enough) expected: []string{"#", "Apache", "Apache-2", "Apache-2.0", "Expat", "GPL-2", "ISC", "LGPL-2.1+", "PSF-2", "Permission", "Python", "This", "see"}, }, + { + fixture: "test-fixtures/copyright/cuda", + expected: []string{"NVIDIA Software License Agreement and CUDA Supplement to Software License Agreement"}, + }, + { + fixture: "test-fixtures/copyright/dev-kit", + expected: []string{"LICENSE AGREEMENT FOR NVIDIA SOFTWARE DEVELOPMENT KITS"}, + }, + { + fixture: "test-fixtures/copyright/microsoft", + expected: []string{"LICENSE AGREEMENT FOR MICROSOFT PRODUCTS"}, + }, } for _, test := range tests { diff --git a/syft/pkg/cataloger/debian/test-fixtures/copyright/cuda b/syft/pkg/cataloger/debian/test-fixtures/copyright/cuda new file mode 100644 index 000000000..c015f5e13 --- /dev/null +++ b/syft/pkg/cataloger/debian/test-fixtures/copyright/cuda @@ -0,0 +1,145 @@ +End User License Agreement +-------------------------- + +NVIDIA Software License Agreement and CUDA Supplement to +Software License Agreement. + +The CUDA Toolkit End User License Agreement applies to the +NVIDIA CUDA Toolkit, the NVIDIA CUDA Samples, the NVIDIA +Display Driver, NVIDIA Nsight tools (Visual Studio Edition), +and the associated documentation on CUDA APIs, programming +model and development tools. If you do not agree with the +terms and conditions of the license agreement, then do not +download or use the software. + +Last updated: January 12, 2024. + + +Preface +------- + +The Software License Agreement in Chapter 1 and the Supplement +in Chapter 2 contain license terms and conditions that govern +the use of NVIDIA toolkit. By accepting this agreement, you +agree to comply with all the terms and conditions applicable +to the product(s) included herein. + + +NVIDIA Driver + + +Description + +This package contains the operating system driver and +fundamental system software components for NVIDIA GPUs. + + +NVIDIA CUDA Toolkit + + +Description + +The NVIDIA CUDA Toolkit provides command-line and graphical +tools for building, debugging and optimizing the performance +of applications accelerated by NVIDIA GPUs, runtime and math +libraries, and documentation including programming guides, +user manuals, and API references. + + +Default Install Location of CUDA Toolkit + +Windows platform: + +%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v#.# + +Linux platform: + +/usr/local/cuda-#.# + +Mac platform: + +/Developer/NVIDIA/CUDA-#.# + + +NVIDIA CUDA Samples + + +Description + +CUDA Samples are now located in +https://github.com/nvidia/cuda-samples, which includes +instructions for obtaining, building, and running the samples. +They are no longer included in the CUDA toolkit. + + +NVIDIA Nsight Visual Studio Edition (Windows only) + + +Description + +NVIDIA Nsight Development Platform, Visual Studio Edition is a +development environment integrated into Microsoft Visual +Studio that provides tools for debugging, profiling, analyzing +and optimizing your GPU computing and graphics applications. + + +Default Install Location of Nsight Visual Studio Edition + +Windows platform: + +%ProgramFiles(x86)%\NVIDIA Corporation\Nsight Visual Studio Edition #.# + + +1. License Agreement for NVIDIA Software Development Kits +--------------------------------------------------------- + + +Important Notice—Read before downloading, installing, +copying or using the licensed software: +------------------------------------------------------- + +This license agreement, including exhibits attached +("Agreement”) is a legal agreement between you and NVIDIA +Corporation ("NVIDIA") and governs your use of a NVIDIA +software development kit (“SDK”). + +Each SDK has its own set of software and materials, but here +is a description of the types of items that may be included in +a SDK: source code, header files, APIs, data sets and assets +(examples include images, textures, models, scenes, videos, +native API input/output files), binary software, sample code, +libraries, utility programs, programming code and +documentation. + +This Agreement can be accepted only by an adult of legal age +of majority in the country in which the SDK is used. + +If you are entering into this Agreement on behalf of a company +or other legal entity, you represent that you have the legal +authority to bind the entity to this Agreement, in which case +“you” will mean the entity you represent. + +If you don’t have the required age or authority to accept +this Agreement, or if you don’t accept all the terms and +conditions of this Agreement, do not download, install or use +the SDK. + +You agree to use the SDK only for purposes that are permitted +by (a) this Agreement, and (b) any applicable law, regulation +or generally accepted practices or guidelines in the relevant +jurisdictions. + + +1.1. License + + +1.1.1. License Grant + +Subject to the terms of this Agreement, NVIDIA hereby grants +you a non-exclusive, non-transferable license, without the +right to sublicense (except as expressly provided in this +Agreement) to: + + 1. Install and use the SDK, + + 2. Modify and create derivativ \ No newline at end of file diff --git a/syft/pkg/cataloger/debian/test-fixtures/copyright/dev-kit b/syft/pkg/cataloger/debian/test-fixtures/copyright/dev-kit new file mode 100644 index 000000000..67125d762 --- /dev/null +++ b/syft/pkg/cataloger/debian/test-fixtures/copyright/dev-kit @@ -0,0 +1 @@ +LICENSE AGREEMENT FOR NVIDIA SOFTWARE DEVELOPMENT KITS \ No newline at end of file diff --git a/syft/pkg/cataloger/debian/test-fixtures/copyright/microsoft b/syft/pkg/cataloger/debian/test-fixtures/copyright/microsoft new file mode 100644 index 000000000..ef3810788 --- /dev/null +++ b/syft/pkg/cataloger/debian/test-fixtures/copyright/microsoft @@ -0,0 +1 @@ +LICENSE AGREEMENT FOR MICROSOFT PRODUCTS \ No newline at end of file