feat: Perform case insensitive matching on Java license files (#2235)

Signed-off-by: Colm O hEigeartaigh <coheigea@apache.org>
This commit is contained in:
Colm O hEigeartaigh 2023-10-25 14:51:59 +01:00 committed by GitHub
parent 7392d607b6
commit 1daf18fee9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 22 additions and 13 deletions

View File

@ -40,15 +40,18 @@ func (z ZipFileManifest) Add(entry string, info os.FileInfo) {
} }
// GlobMatch returns the path keys that match the given value(s). // GlobMatch returns the path keys that match the given value(s).
func (z ZipFileManifest) GlobMatch(patterns ...string) []string { func (z ZipFileManifest) GlobMatch(caseInsensitive bool, patterns ...string) []string {
uniqueMatches := strset.New() uniqueMatches := strset.New()
for _, pattern := range patterns { for _, pattern := range patterns {
for entry := range z { for entry := range z {
// We want to match globs as if entries begin with a leading slash (akin to an absolute path) // We want to match globs as if entries begin with a leading slash (akin to an absolute path)
// so that glob logic is consistent inside and outside of ZIP archives // so that glob logic is consistent inside and outside of ZIP archives
normalizedEntry := normalizeZipEntryName(entry) normalizedEntry := normalizeZipEntryName(caseInsensitive, entry)
if caseInsensitive {
pattern = strings.ToLower(pattern)
}
if GlobMatch(pattern, normalizedEntry) { if GlobMatch(pattern, normalizedEntry) {
uniqueMatches.Add(entry) uniqueMatches.Add(entry)
} }
@ -62,7 +65,10 @@ func (z ZipFileManifest) GlobMatch(patterns ...string) []string {
} }
// normalizeZipEntryName takes the given path entry and ensures it is prefixed with "/". // normalizeZipEntryName takes the given path entry and ensures it is prefixed with "/".
func normalizeZipEntryName(entry string) string { func normalizeZipEntryName(caseInsensitive bool, entry string) string {
if caseInsensitive {
entry = strings.ToLower(entry)
}
if !strings.HasPrefix(entry, "/") { if !strings.HasPrefix(entry, "/") {
return "/" + entry return "/" + entry
} }

View File

@ -116,6 +116,10 @@ func TestZipFileManifest_GlobMatch(t *testing.T) {
"*/a-file.txt", "*/a-file.txt",
"some-dir/a-file.txt", "some-dir/a-file.txt",
}, },
{
"*/A-file.txt",
"some-dir/a-file.txt",
},
{ {
"**/*.zip", "**/*.zip",
"nested.zip", "nested.zip",
@ -126,7 +130,7 @@ func TestZipFileManifest_GlobMatch(t *testing.T) {
t.Run(tc.glob, func(t *testing.T) { t.Run(tc.glob, func(t *testing.T) {
glob := tc.glob glob := tc.glob
results := z.GlobMatch(glob) results := z.GlobMatch(true, glob)
if len(results) == 1 && results[0] == tc.expected { if len(results) == 1 && results[0] == tc.expected {
return return

View File

@ -20,7 +20,6 @@ func FileNames() []string {
"LICENSE", "LICENSE",
"LICENSE.md", "LICENSE.md",
"LICENSE.markdown", "LICENSE.markdown",
"license.txt",
"LICENSE.txt", "LICENSE.txt",
"LICENSE-2.0.txt", "LICENSE-2.0.txt",
"LICENCE-2.0.txt", "LICENCE-2.0.txt",

View File

@ -150,7 +150,7 @@ func (j *archiveParser) parse() ([]pkg.Package, []artifact.Relationship, error)
// discoverMainPackage parses the root Java manifest used as the parent package to all discovered nested packages. // discoverMainPackage parses the root Java manifest used as the parent package to all discovered nested packages.
func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) { func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) {
// search and parse java manifest files // search and parse java manifest files
manifestMatches := j.fileManifest.GlobMatch(manifestGlob) manifestMatches := j.fileManifest.GlobMatch(false, manifestGlob)
if len(manifestMatches) > 1 { if len(manifestMatches) > 1 {
return nil, fmt.Errorf("found multiple manifests in the jar: %+v", manifestMatches) return nil, fmt.Errorf("found multiple manifests in the jar: %+v", manifestMatches)
} else if len(manifestMatches) == 0 { } else if len(manifestMatches) == 0 {
@ -246,8 +246,8 @@ type parsedPomProject struct {
} }
func (j *archiveParser) guessMainPackageNameAndVersionFromPomInfo() (name, version string, licenses []pkg.License) { func (j *archiveParser) guessMainPackageNameAndVersionFromPomInfo() (name, version string, licenses []pkg.License) {
pomPropertyMatches := j.fileManifest.GlobMatch(pomPropertiesGlob) pomPropertyMatches := j.fileManifest.GlobMatch(false, pomPropertiesGlob)
pomMatches := j.fileManifest.GlobMatch(pomXMLGlob) pomMatches := j.fileManifest.GlobMatch(false, pomXMLGlob)
var pomPropertiesObject pkg.PomProperties var pomPropertiesObject pkg.PomProperties
var pomProjectObject parsedPomProject var pomProjectObject parsedPomProject
if len(pomPropertyMatches) == 1 || len(pomMatches) == 1 { if len(pomPropertyMatches) == 1 || len(pomMatches) == 1 {
@ -295,13 +295,13 @@ func (j *archiveParser) discoverPkgsFromAllMavenFiles(parentPkg *pkg.Package) ([
var pkgs []pkg.Package var pkgs []pkg.Package
// pom.properties // pom.properties
properties, err := pomPropertiesByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(pomPropertiesGlob)) properties, err := pomPropertiesByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomPropertiesGlob))
if err != nil { if err != nil {
return nil, err return nil, err
} }
// pom.xml // pom.xml
projects, err := pomProjectByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(pomXMLGlob)) projects, err := pomProjectByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob))
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -340,10 +340,10 @@ func getDigestsFromArchive(archivePath string) ([]file.Digest, error) {
func (j *archiveParser) getLicenseFromFileInArchive() ([]pkg.License, error) { func (j *archiveParser) getLicenseFromFileInArchive() ([]pkg.License, error) {
var fileLicenses []pkg.License var fileLicenses []pkg.License
for _, filename := range licenses.FileNames() { for _, filename := range licenses.FileNames() {
licenseMatches := j.fileManifest.GlobMatch("/META-INF/" + filename) licenseMatches := j.fileManifest.GlobMatch(true, "/META-INF/"+filename)
if len(licenseMatches) == 0 { if len(licenseMatches) == 0 {
// Try the root directory if it's not in META-INF // Try the root directory if it's not in META-INF
licenseMatches = j.fileManifest.GlobMatch("/" + filename) licenseMatches = j.fileManifest.GlobMatch(true, "/"+filename)
} }
if len(licenseMatches) > 0 { if len(licenseMatches) > 0 {
@ -378,7 +378,7 @@ func (j *archiveParser) discoverPkgsFromNestedArchives(parentPkg *pkg.Package) (
// associating each discovered package to the given parent package. // associating each discovered package to the given parent package.
func discoverPkgsFromZip(location file.Location, archivePath, contentPath string, fileManifest intFile.ZipFileManifest, parentPkg *pkg.Package) ([]pkg.Package, []artifact.Relationship, error) { func discoverPkgsFromZip(location file.Location, archivePath, contentPath string, fileManifest intFile.ZipFileManifest, parentPkg *pkg.Package) ([]pkg.Package, []artifact.Relationship, error) {
// search and parse pom.properties files & fetch the contents // search and parse pom.properties files & fetch the contents
openers, err := intFile.ExtractFromZipToUniqueTempFile(archivePath, contentPath, fileManifest.GlobMatch(archiveFormatGlobs...)...) openers, err := intFile.ExtractFromZipToUniqueTempFile(archivePath, contentPath, fileManifest.GlobMatch(false, archiveFormatGlobs...)...)
if err != nil { if err != nil {
return nil, nil, fmt.Errorf("unable to extract files from zip: %w", err) return nil, nil, fmt.Errorf("unable to extract files from zip: %w", err)
} }