feat: improve dpkg cataloger license recognition for "license agreements" (#3888)

This commit is contained in:
Christopher Angelo Phillips 2025-05-14 08:41:48 -04:00 committed by GitHub
parent 175a6719a9
commit e5d7760bb8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 207 additions and 13 deletions

View File

@ -118,17 +118,24 @@ func addLicenses(ctx context.Context, resolver file.Resolver, dbLocation file.Lo
// get license information from the copyright file
copyrightReader, copyrightLocation := fetchCopyrightContents(resolver, dbLocation, metadata)
var licenseStrs []string
if copyrightReader != nil && copyrightLocation != nil {
defer internal.CloseAndLogError(copyrightReader, copyrightLocation.AccessPath)
// attach the licenses
licenseStrs := parseLicensesFromCopyright(copyrightReader)
licenseStrs = parseLicensesFromCopyright(copyrightReader)
for _, licenseStr := range licenseStrs {
p.Licenses.Add(pkg.NewLicenseFromLocationsWithContext(ctx, licenseStr, copyrightLocation.WithoutAnnotations()))
}
// keep a record of the file where this was discovered
p.Locations.Add(*copyrightLocation)
}
// try to use the license classifier if parsing the copyright file failed
if len(licenseStrs) == 0 {
sr, sl := fetchCopyrightContents(resolver, dbLocation, metadata)
if sr != nil && sl != nil {
p.Licenses.Add(pkg.NewLicensesFromReadCloserWithContext(ctx, file.NewLocationReadCloser(*sl, sr))...)
}
}
}
func mergeFileListing(resolver file.Resolver, dbLocation file.Location, p *pkg.Package) {

View File

@ -1,7 +1,6 @@
package debian
import (
"bufio"
"io"
"regexp"
"sort"
@ -15,32 +14,49 @@ import (
// For more information see: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/#license-syntax
var (
licensePattern = regexp.MustCompile(`^License: (?P<license>\S*)`)
commonLicensePathPattern = regexp.MustCompile(`/usr/share/common-licenses/(?P<license>[0-9A-Za-z_.\-]+)`)
licensePattern = regexp.MustCompile(`^License: (?P<license>\S*)`)
commonLicensePathPattern = regexp.MustCompile(`/usr/share/common-licenses/(?P<license>[0-9A-Za-z_.\-]+)`)
licenseFirstSentenceAfterHeadingPattern = regexp.MustCompile(`(?is)^[^\n]+?\n[-]+?\n+(?P<license>.*?\.)`)
licenseAgreementHeadingPattern = regexp.MustCompile(`(?i)^\s*(?P<license>LICENSE AGREEMENT(?: FOR .+?)?)\s*$`)
)
func parseLicensesFromCopyright(reader io.Reader) []string {
findings := strset.New()
scanner := bufio.NewScanner(reader)
data, err := io.ReadAll(reader)
if err != nil {
// Fail-safe: return nothing if unable to read
return []string{}
}
for scanner.Scan() {
line := scanner.Text()
if value := findLicenseClause(licensePattern, "license", line); value != "" {
content := string(data)
lines := strings.Split(content, "\n")
for _, line := range lines {
if value := findLicenseClause(licensePattern, line); value != "" {
findings.Add(value)
}
if value := findLicenseClause(commonLicensePathPattern, "license", line); value != "" {
if value := findLicenseClause(commonLicensePathPattern, line); value != "" {
findings.Add(value)
}
if value := findLicenseClause(licenseAgreementHeadingPattern, line); value != "" {
findings.Add(value)
}
}
results := findings.List()
// some copyright files have a license declaration after the heading ex:
// End User License Agreement\n--------------------------
// we want to try and find these multi-line license declarations and make exceptions for them
if value := findLicenseClause(licenseFirstSentenceAfterHeadingPattern, content); value != "" {
findings.Add(value)
}
results := findings.List()
sort.Strings(results)
return results
}
func findLicenseClause(pattern *regexp.Regexp, valueGroup, line string) string {
func findLicenseClause(pattern *regexp.Regexp, line string) string {
valueGroup := "license"
matchesByGroup := internal.MatchNamedCaptureGroups(pattern, line)
candidate, ok := matchesByGroup[valueGroup]
@ -51,9 +67,21 @@ func findLicenseClause(pattern *regexp.Regexp, valueGroup, line string) string {
return ensureIsSingleLicense(candidate)
}
var multiLicenseExceptions = []string{
"NVIDIA Software License Agreement",
}
func ensureIsSingleLicense(candidate string) (license string) {
candidate = strings.TrimSpace(candidate)
candidate = strings.TrimSpace(strings.ReplaceAll(candidate, "\n", " "))
// Check for exceptions first
for _, exception := range multiLicenseExceptions {
if strings.Contains(candidate, exception) {
return strings.TrimSuffix(candidate, ".")
}
}
if strings.Contains(candidate, " or ") || strings.Contains(candidate, " and ") {
// make sure this is not one of the license exceptions
// this is a multi-license summary, ignore this as other recurrent license lines should cover this
return
}

View File

@ -35,6 +35,18 @@ func TestParseLicensesFromCopyright(t *testing.T) {
// note: this should not capture #, Permission, This, see ... however it's not clear how to fix this (this is probably good enough)
expected: []string{"#", "Apache", "Apache-2", "Apache-2.0", "Expat", "GPL-2", "ISC", "LGPL-2.1+", "PSF-2", "Permission", "Python", "This", "see"},
},
{
fixture: "test-fixtures/copyright/cuda",
expected: []string{"NVIDIA Software License Agreement and CUDA Supplement to Software License Agreement"},
},
{
fixture: "test-fixtures/copyright/dev-kit",
expected: []string{"LICENSE AGREEMENT FOR NVIDIA SOFTWARE DEVELOPMENT KITS"},
},
{
fixture: "test-fixtures/copyright/microsoft",
expected: []string{"LICENSE AGREEMENT FOR MICROSOFT PRODUCTS"},
},
}
for _, test := range tests {

View File

@ -0,0 +1,145 @@
End User License Agreement
--------------------------
NVIDIA Software License Agreement and CUDA Supplement to
Software License Agreement.
The CUDA Toolkit End User License Agreement applies to the
NVIDIA CUDA Toolkit, the NVIDIA CUDA Samples, the NVIDIA
Display Driver, NVIDIA Nsight tools (Visual Studio Edition),
and the associated documentation on CUDA APIs, programming
model and development tools. If you do not agree with the
terms and conditions of the license agreement, then do not
download or use the software.
Last updated: January 12, 2024.
Preface
-------
The Software License Agreement in Chapter 1 and the Supplement
in Chapter 2 contain license terms and conditions that govern
the use of NVIDIA toolkit. By accepting this agreement, you
agree to comply with all the terms and conditions applicable
to the product(s) included herein.
NVIDIA Driver
Description
This package contains the operating system driver and
fundamental system software components for NVIDIA GPUs.
NVIDIA CUDA Toolkit
Description
The NVIDIA CUDA Toolkit provides command-line and graphical
tools for building, debugging and optimizing the performance
of applications accelerated by NVIDIA GPUs, runtime and math
libraries, and documentation including programming guides,
user manuals, and API references.
Default Install Location of CUDA Toolkit
Windows platform:
%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v#.#
Linux platform:
/usr/local/cuda-#.#
Mac platform:
/Developer/NVIDIA/CUDA-#.#
NVIDIA CUDA Samples
Description
CUDA Samples are now located in
https://github.com/nvidia/cuda-samples, which includes
instructions for obtaining, building, and running the samples.
They are no longer included in the CUDA toolkit.
NVIDIA Nsight Visual Studio Edition (Windows only)
Description
NVIDIA Nsight Development Platform, Visual Studio Edition is a
development environment integrated into Microsoft Visual
Studio that provides tools for debugging, profiling, analyzing
and optimizing your GPU computing and graphics applications.
Default Install Location of Nsight Visual Studio Edition
Windows platform:
%ProgramFiles(x86)%\NVIDIA Corporation\Nsight Visual Studio Edition #.#
1. License Agreement for NVIDIA Software Development Kits
---------------------------------------------------------
Important Notice—Read before downloading, installing,
copying or using the licensed software:
-------------------------------------------------------
This license agreement, including exhibits attached
("Agreement”) is a legal agreement between you and NVIDIA
Corporation ("NVIDIA") and governs your use of a NVIDIA
software development kit (“SDK”).
Each SDK has its own set of software and materials, but here
is a description of the types of items that may be included in
a SDK: source code, header files, APIs, data sets and assets
(examples include images, textures, models, scenes, videos,
native API input/output files), binary software, sample code,
libraries, utility programs, programming code and
documentation.
This Agreement can be accepted only by an adult of legal age
of majority in the country in which the SDK is used.
If you are entering into this Agreement on behalf of a company
or other legal entity, you represent that you have the legal
authority to bind the entity to this Agreement, in which case
“you” will mean the entity you represent.
If you dont have the required age or authority to accept
this Agreement, or if you dont accept all the terms and
conditions of this Agreement, do not download, install or use
the SDK.
You agree to use the SDK only for purposes that are permitted
by (a) this Agreement, and (b) any applicable law, regulation
or generally accepted practices or guidelines in the relevant
jurisdictions.
1.1. License
1.1.1. License Grant
Subject to the terms of this Agreement, NVIDIA hereby grants
you a non-exclusive, non-transferable license, without the
right to sublicense (except as expressly provided in this
Agreement) to:
1. Install and use the SDK,
2. Modify and create derivativ

View File

@ -0,0 +1 @@
LICENSE AGREEMENT FOR NVIDIA SOFTWARE DEVELOPMENT KITS

View File

@ -0,0 +1 @@
LICENSE AGREEMENT FOR MICROSOFT PRODUCTS